In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C8']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C8 pretrained (AngelaCNN) (3 noops after placement)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-08 22:23:55,233	INFO wandb.py:170 -- Already logged into W&B.
2021-10-08 22:23:55,254	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_6c5b8_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=40112)[0m 2021-10-08 22:23:58,754	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=40112)[0m 2021-10-08 22:23:58,754	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-08_22-25-09
  done: false
  episode_len_mean: 413.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -7.0
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.680601273642646
          entropy_coeff: 0.009999999999999998
          kl: 0.01061723793506031
          policy_loss: 0.014398261490795348
          total_loss: 0.3600214495841
          vf_explained_var: -0.008035365492105484
          vf_loss: 0.3703057582800587
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1,65.3641,1000,-7,0,-14,413


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-08_22-25-30
  done: false
  episode_len_mean: 403.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -3.5
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7041153642866345
          entropy_coeff: 0.009999999999999998
          kl: 0.011423616325345927
          policy_loss: 0.014449085998866294
          total_loss: 0.007905774811903635
          vf_explained_var: 0.19078736007213593
          vf_loss: 0.018213119099123612
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2,86.2017,2000,-3.5,0,-14,403.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-08_22-25-52
  done: false
  episode_len_mean: 391.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.0
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.712230372428894
          entropy_coeff: 0.009999999999999998
          kl: 0.011001561227270142
          policy_loss: -0.0054480022440354025
          total_loss: -0.020296060707834032
          vf_explained_var: 0.32518404722213745
          vf_loss: 0.010073932490518524
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,3,108.266,3000,-2,0,-14,391


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-08_22-26-13
  done: false
  episode_len_mean: 387.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.4
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 10
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6923079437679713
          entropy_coeff: 0.009999999999999998
          kl: 0.0049523883530629064
          policy_loss: -0.11970868996447986
          total_loss: -0.13817452821466658
          vf_explained_var: 0.4264937937259674
          vf_loss: 0.007466761967063778
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,4,129.081,4000,-1.4,0,-14,387.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-08_22-26-36
  done: false
  episode_len_mean: 386.6666666666667
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.0833333333333335
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5850550254185993
          entropy_coeff: 0.009999999999999998
          kl: 0.010939156647359245
          policy_loss: -0.27721503434909717
          total_loss: -0.17691919638050926
          vf_explained_var: 0.4083431661128998
          vf_loss: 0.1250524717486567
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,5,151.558,5000,-2.08333,0,-14,386.667


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-08_22-26-59
  done: false
  episode_len_mean: 381.06666666666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -3.466666666666667
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 15
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.628725830713908
          entropy_coeff: 0.009999999999999998
          kl: 0.0114636253725734
          policy_loss: 0.02687674214442571
          total_loss: 0.1786295685503218
          vf_explained_var: 0.42652538418769836
          vf_loss: 0.17689372335250178
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,6,175.237,6000,-3.46667,0,-17,381.067


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-08_22-27-23
  done: false
  episode_len_mean: 374.77777777777777
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.888888888888889
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 18
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6143610742357044
          entropy_coeff: 0.009999999999999998
          kl: 0.01075896448842452
          policy_loss: -0.0766780694325765
          total_loss: -0.08892696193522877
          vf_explained_var: -0.049227189272642136
          vf_loss: 0.012818821892142295
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,7,198.719,7000,-2.88889,0,-17,374.778


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-08_22-27-45
  done: false
  episode_len_mean: 372.14285714285717
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.4761904761904763
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 21
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6095904615190295
          entropy_coeff: 0.009999999999999998
          kl: 0.010107885834288637
          policy_loss: -0.02073970263202985
          total_loss: -0.04199816944698493
          vf_explained_var: 0.07559360563755035
          vf_loss: 0.003826649426223917
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,8,220.946,8000,-2.47619,0,-17,372.143


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-08_22-28-06
  done: false
  episode_len_mean: 373.625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.1666666666666665
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 24
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.601945095592075
          entropy_coeff: 0.009999999999999998
          kl: 0.011204313276243048
          policy_loss: -0.06529171864191691
          total_loss: -0.084709133207798
          vf_explained_var: 0.7706239223480225
          vf_loss: 0.005481600044812593
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,9,241.324,9000,-2.16667,0,-17,373.625


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-08_22-28-26
  done: false
  episode_len_mean: 373.46153846153845
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.0
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.635689157909817
          entropy_coeff: 0.009999999999999998
          kl: 0.013581097363524464
          policy_loss: -0.017410087254312305
          total_loss: -0.04024956731332673
          vf_explained_var: 0.3289504945278168
          vf_loss: 0.0021593012834071287
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,10,261.373,10000,-2,0,-17,373.462


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-08_22-28-46
  done: false
  episode_len_mean: 375.62068965517244
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.793103448275862
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 29
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.635822730594211
          entropy_coeff: 0.009999999999999998
          kl: 0.009701815019438323
          policy_loss: -0.05053257221976916
          total_loss: -0.07218241954429282
          vf_explained_var: 0.022930027917027473
          vf_loss: 0.0037382007524785067
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,11,281.865,11000,-1.7931,0,-17,375.621




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-08_22-29-28
  done: false
  episode_len_mean: 373.21875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.625
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 32
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6131074031194053
          entropy_coeff: 0.009999999999999998
          kl: 0.010199062259604751
          policy_loss: -0.07006435907549328
          total_loss: -0.09337392275532087
          vf_explained_var: 0.5852172374725342
          vf_loss: 0.001801604888169095
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,12,323.491,12000,-1.625,0,-17,373.219


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-08_22-29-53
  done: false
  episode_len_mean: 372.61764705882354
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.5294117647058822
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.614541522661845
          entropy_coeff: 0.009999999999999998
          kl: 0.0134476212793186
          policy_loss: -0.001459013836251365
          total_loss: -0.02531754488332404
          vf_explained_var: -0.3224055767059326
          vf_loss: 0.0009421206816720466
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,13,348.517,13000,-1.52941,0,-17,372.618


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-08_22-30-17
  done: false
  episode_len_mean: 371.7837837837838
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.4054054054054055
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 37
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6388224363327026
          entropy_coeff: 0.009999999999999998
          kl: 0.011835492374769933
          policy_loss: -0.05227134115993977
          total_loss: -0.07663661326385207
          vf_explained_var: -0.6752870678901672
          vf_loss: 0.0008394020221506556
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,14,372.409,14000,-1.40541,0,-17,371.784


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-08_22-30-40
  done: false
  episode_len_mean: 370.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.3
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 40
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6005810684627955
          entropy_coeff: 0.009999999999999998
          kl: 0.01132735477680331
          policy_loss: -0.05408719020585219
          total_loss: -0.0753342761968573
          vf_explained_var: -0.519787073135376
          vf_loss: 0.003625988982902426
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,15,396.067,15000,-1.3,0,-17,370.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-08_22-31-04
  done: false
  episode_len_mean: 371.5813953488372
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.2093023255813953
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 43
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6447353839874266
          entropy_coeff: 0.009999999999999998
          kl: 0.012680854671561331
          policy_loss: 0.024207663287719092
          total_loss: 0.0008783184819751316
          vf_explained_var: -0.19496801495552063
          vf_loss: 0.0018499237588710255
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,16,419.46,16000,-1.2093,0,-17,371.581


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-08_22-31-26
  done: false
  episode_len_mean: 372.93333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.1555555555555554
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 45
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4721456474728054
          entropy_coeff: 0.009999999999999998
          kl: 0.016984153149558825
          policy_loss: -0.10765206269505952
          total_loss: -0.1281614815402362
          vf_explained_var: -0.7414586544036865
          vf_loss: 0.002513618884323579
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,17,441.24,17000,-1.15556,0,-17,372.933


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-08_22-31-48
  done: false
  episode_len_mean: 375.70212765957444
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.1063829787234043
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 47
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4722130060195924
          entropy_coeff: 0.009999999999999998
          kl: 0.01085364661675315
          policy_loss: -0.007267607707116339
          total_loss: -0.029482085971782604
          vf_explained_var: 0.062245871871709824
          vf_loss: 0.0014222838362911716
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,18,463.174,18000,-1.10638,0,-17,375.702


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-08_22-32-10
  done: false
  episode_len_mean: 376.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.04
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 50
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5329273541768393
          entropy_coeff: 0.009999999999999998
          kl: 0.011140066504559175
          policy_loss: -0.12728225539127985
          total_loss: -0.1502650378064977
          vf_explained_var: -0.4623890221118927
          vf_loss: 0.0012324840250787221
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,19,486.037,19000,-1.04,0,-17,376.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-08_22-32-33
  done: false
  episode_len_mean: 377.9807692307692
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 52
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.548286294937134
          entropy_coeff: 0.009999999999999998
          kl: 0.013046762194523973
          policy_loss: -0.06970301907923486
          total_loss: -0.09208315727818343
          vf_explained_var: -0.7936792373657227
          vf_loss: 0.0017980472643911425
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,20,508.389,20000,-1,0,-17,377.981


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-08_22-32-56
  done: false
  episode_len_mean: 378.56363636363636
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9454545454545454
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 55
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5536413616604277
          entropy_coeff: 0.009999999999999998
          kl: 0.01849438074781581
          policy_loss: -0.02023462677995364
          total_loss: -0.04268074579950836
          vf_explained_var: -0.7770184874534607
          vf_loss: 0.0012408539161293042
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,21,531.34,21000,-0.945455,0,-17,378.564


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-08_22-33-17
  done: false
  episode_len_mean: 379.6140350877193
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9122807017543859
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 57
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.559387217627631
          entropy_coeff: 0.009999999999999998
          kl: 0.012527011233966507
          policy_loss: -0.0698491820341183
          total_loss: -0.0933192849987083
          vf_explained_var: -0.8817660212516785
          vf_loss: 0.0008710692507318325
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,22,552.468,22000,-0.912281,0,-17,379.614




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-08_22-33-57
  done: false
  episode_len_mean: 380.76666666666665
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8666666666666667
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 60
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3951367881562975
          entropy_coeff: 0.009999999999999998
          kl: 0.014485169243183629
          policy_loss: -0.10667750512560209
          total_loss: -0.12803563094801373
          vf_explained_var: -0.44949740171432495
          vf_loss: 0.0011447275451953627
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,23,592.382,23000,-0.866667,0,-17,380.767


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-08_22-34-18
  done: false
  episode_len_mean: 381.46774193548384
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8387096774193549
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 62
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.496274079216851
          entropy_coeff: 0.009999999999999998
          kl: 0.015583504985882584
          policy_loss: -0.07586543711109293
          total_loss: -0.09826565113746458
          vf_explained_var: -0.7701025009155273
          vf_loss: 0.0010041757059904436
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,24,613.617,24000,-0.83871,0,-17,381.468


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-08_22-34-38
  done: false
  episode_len_mean: 382.8615384615385
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 65
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4371846834818522
          entropy_coeff: 0.009999999999999998
          kl: 0.018685641318178212
          policy_loss: -0.09470518392821153
          total_loss: -0.11544515217343966
          vf_explained_var: -0.4645217955112457
          vf_loss: 0.0017633148540173553
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,25,633.42,25000,-0.8,0,-17,382.862


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-08_22-34-58
  done: false
  episode_len_mean: 383.6268656716418
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7761194029850746
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 67
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4871999184290567
          entropy_coeff: 0.009999999999999998
          kl: 0.018716439800419465
          policy_loss: -0.06255637533548805
          total_loss: -0.08437145472400719
          vf_explained_var: 0.023002522066235542
          vf_loss: 0.001185278164403927
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,26,653.412,26000,-0.776119,0,-17,383.627


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-08_22-35-19
  done: false
  episode_len_mean: 383.9428571428571
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7428571428571429
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 70
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.566101352373759
          entropy_coeff: 0.009999999999999998
          kl: 0.015523474489890443
          policy_loss: -0.06829116882549392
          total_loss: -0.09121759351756838
          vf_explained_var: -0.4914383292198181
          vf_loss: 0.001182238227597231
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,27,674.063,27000,-0.742857,0,-17,383.943


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-08_22-35-39
  done: false
  episode_len_mean: 384.69444444444446
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7222222222222222
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 72
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.608701629108853
          entropy_coeff: 0.009999999999999998
          kl: 0.011708169585458172
          policy_loss: -0.0687175270790855
          total_loss: -0.09304758821510606
          vf_explained_var: -0.8399937748908997
          vf_loss: 0.0005861399261953516
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,28,694.369,28000,-0.722222,0,-17,384.694


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-08_22-35-58
  done: false
  episode_len_mean: 386.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6933333333333334
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 75
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4992286205291747
          entropy_coeff: 0.009999999999999998
          kl: 0.014536042906965098
          policy_loss: -0.06283370997342798
          total_loss: -0.08545535827676455
          vf_explained_var: -0.5450249314308167
          vf_loss: 0.0009170322717788319
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,29,713.587,29000,-0.693333,0,-17,386.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-08_22-36-19
  done: false
  episode_len_mean: 386.42857142857144
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6753246753246753
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 77
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.630199909210205
          entropy_coeff: 0.009999999999999998
          kl: 0.011107268519612069
          policy_loss: -0.03630538102653291
          total_loss: -0.06077079754322767
          vf_explained_var: -0.950014054775238
          vf_loss: 0.0007258561769655595
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,30,734.09,30000,-0.675325,0,-17,386.429


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-08_22-36-40
  done: false
  episode_len_mean: 386.2125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.65
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 80
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.658619565433926
          entropy_coeff: 0.009999999999999998
          kl: 0.015103961210637085
          policy_loss: 0.03513341347376506
          total_loss: 0.010536973592307833
          vf_explained_var: -0.9966903924942017
          vf_loss: 0.0004793607882068803
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,31,755.14,31000,-0.65,0,-17,386.212


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-08_22-37-00
  done: false
  episode_len_mean: 386.5243902439024
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6341463414634146
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 82
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.686210608482361
          entropy_coeff: 0.009999999999999998
          kl: 0.012015979229522916
          policy_loss: -0.12404515995747513
          total_loss: -0.14929976508849196
          vf_explained_var: -0.9782360792160034
          vf_loss: 0.00040590175906092757
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,32,775.806,32000,-0.634146,0,-17,386.524


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-08_22-37-20
  done: false
  episode_len_mean: 387.52941176470586
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.611764705882353
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 85
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5826979928546483
          entropy_coeff: 0.009999999999999998
          kl: 0.01722975554216212
          policy_loss: -0.05468614548444748
          total_loss: -0.07755304864711232
          vf_explained_var: -0.24033547937870026
          vf_loss: 0.0012371004458853147
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,33,795.372,33000,-0.611765,0,-17,387.529


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-08_22-37-42
  done: false
  episode_len_mean: 387.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5977011494252874
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 87
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5476075516806707
          entropy_coeff: 0.009999999999999998
          kl: 0.01836907026683663
          policy_loss: -0.09020456025997797
          total_loss: -0.1127914467619525
          vf_explained_var: -0.490717351436615
          vf_loss: 0.0010522794579931845
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,34,817.777,34000,-0.597701,0,-17,387




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-08_22-38-19
  done: false
  episode_len_mean: 387.0111111111111
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5777777777777777
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 90
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5853884776433307
          entropy_coeff: 0.009999999999999998
          kl: 0.017879716901293422
          policy_loss: -0.06773135866969823
          total_loss: -0.09077360964276725
          vf_explained_var: -0.5233157873153687
          vf_loss: 0.0010236616670024684
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,35,854.375,35000,-0.577778,0,-17,387.011


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-08_22-38-41
  done: false
  episode_len_mean: 386.70652173913044
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5652173913043478
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 92
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6456380711661445
          entropy_coeff: 0.009999999999999998
          kl: 0.015617868499640114
          policy_loss: -0.08553327309588592
          total_loss: -0.10979659797416794
          vf_explained_var: -0.7102473378181458
          vf_loss: 0.0006312691988164766
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,36,876.636,36000,-0.565217,0,-17,386.707


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-08_22-39-04
  done: false
  episode_len_mean: 387.09473684210525
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5473684210526316
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 95
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.667511839336819
          entropy_coeff: 0.009999999999999998
          kl: 0.012422356582159861
          policy_loss: -0.0011240775179531838
          total_loss: -0.026105871413730913
          vf_explained_var: -1.0
          vf_loss: 0.0004510884192616989
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,37,899.086,37000,-0.547368,0,-17,387.095


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-08_22-39-26
  done: false
  episode_len_mean: 387.2959183673469
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5306122448979592
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.622952175140381
          entropy_coeff: 0.009999999999999998
          kl: 0.010853651994056385
          policy_loss: -0.03601161775489648
          total_loss: -0.06041895767880811
          vf_explained_var: -0.9108760356903076
          vf_loss: 0.0007368141736757631
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,38,920.895,38000,-0.530612,0,-17,387.296


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-08_22-39-46
  done: false
  episode_len_mean: 387.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5986106210284765
          entropy_coeff: 0.009999999999999998
          kl: 0.01661568215516874
          policy_loss: -0.02140029768149058
          total_loss: -0.044921852896610895
          vf_explained_var: -0.2667360007762909
          vf_loss: 0.0008029805949061281
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,39,941.664,39000,-0.52,0,-17,387.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-08_22-40-08
  done: false
  episode_len_mean: 387.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 103
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6624467266930476
          entropy_coeff: 0.009999999999999998
          kl: 0.009223860680644878
          policy_loss: 0.00099721137020323
          total_loss: -0.024099063273105355
          vf_explained_var: -0.918176531791687
          vf_loss: 0.0006058074860549015
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,40,963.506,40000,-0.38,0,-17,387.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-08_22-40-30
  done: false
  episode_len_mean: 387.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 105
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5835703955756295
          entropy_coeff: 0.009999999999999998
          kl: 0.01743495212531327
          policy_loss: -0.052833822286791274
          total_loss: -0.07544109308057362
          vf_explained_var: -0.28782907128334045
          vf_loss: 0.0014849387794836529
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,41,985.325,41000,-0.38,0,-17,387.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-08_22-40-51
  done: false
  episode_len_mean: 388.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38
  episode_reward_min: -17.0
  episodes_this_iter: 3
  episodes_total: 108
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4979066689809164
          entropy_coeff: 0.009999999999999998
          kl: 0.02046562414399849
          policy_loss: -0.04090639551480611
          total_loss: -0.061387385345167585
          vf_explained_var: -0.25441238284111023
          vf_loss: 0.002451514133847215
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,42,1006.04,42000,-0.38,0,-17,388.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-08_22-41-12
  done: false
  episode_len_mean: 388.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5271907329559324
          entropy_coeff: 0.009999999999999998
          kl: 0.018770437620686406
          policy_loss: -0.05362330118401183
          total_loss: -0.07306673582643271
          vf_explained_var: -0.19380487501621246
          vf_loss: 0.0030129061009372686
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,43,1026.73,43000,-0.38,0,-17,388.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-08_22-41-32
  done: false
  episode_len_mean: 389.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 113
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5785748455259534
          entropy_coeff: 0.009999999999999998
          kl: 0.01624408711602336
          policy_loss: -0.0637845783183972
          total_loss: -0.08556575328111649
          vf_explained_var: -0.12452713400125504
          vf_loss: 0.0015679610534183061
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,44,1046.76,44000,-0.1,0,-10,389.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-08_22-41-51
  done: false
  episode_len_mean: 391.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 115
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.522672947247823
          entropy_coeff: 0.009999999999999998
          kl: 0.01764136477605436
          policy_loss: -0.06960918667415777
          total_loss: -0.09047831859853532
          vf_explained_var: -0.44417473673820496
          vf_loss: 0.0017113913380954829
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,45,1065.67,45000,0,0,0,391.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-08_22-42-09
  done: false
  episode_len_mean: 393.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.540082958009508
          entropy_coeff: 0.009999999999999998
          kl: 0.016898701841503043
          policy_loss: -0.07278268227560653
          total_loss: -0.09388455287035968
          vf_explained_var: -0.9262400269508362
          vf_loss: 0.0017641552413503328
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,46,1083.86,46000,0,0,0,393.75




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-08_22-42-47
  done: false
  episode_len_mean: 393.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4463132063547772
          entropy_coeff: 0.009999999999999998
          kl: 0.015033439551171618
          policy_loss: -0.09370526192295882
          total_loss: -0.10145690240379837
          vf_explained_var: -0.4916439950466156
          vf_loss: 0.01445647166433951
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip: 192.168.3.5
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,47,1122.42,47000,0,0,0,393.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-08_22-43-10
  done: false
  episode_len_mean: 393.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 123
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5365313741895887
          entropy_coeff: 0.009999999999999998
          kl: 0.01686864928592978
          policy_loss: -0.06986109217007955
          total_loss: -0.0914964959025383
          vf_explained_var: -0.09645112603902817
          vf_loss: 0.0011996086389343772
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,48,1145.17,48000,0,0,0,393.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-08_22-43-32
  done: false
  episode_len_mean: 394.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 125
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.426044872072008
          entropy_coeff: 0.009999999999999998
          kl: 0.016868076479581114
          policy_loss: -0.041247892296976514
          total_loss: -0.06191746596660879
          vf_explained_var: -1.0
          vf_loss: 0.0010606636344972584
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node_ip: 192.168.3.5
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,49,1166.62,49000,0,0,0,394.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-08_22-43-52
  done: false
  episode_len_mean: 394.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 128
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4419408904181585
          entropy_coeff: 0.009999999999999998
          kl: 0.008980304758825386
          policy_loss: -0.3544030401441786
          total_loss: -0.37630459003978306
          vf_explained_var: -0.8803291320800781
          vf_loss: 0.0011708113972821997
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,50,1186.94,50000,-0.08,0,-8,394.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-08_22-44-15
  done: false
  episode_len_mean: 394.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 130
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.407989101939731
          entropy_coeff: 0.009999999999999998
          kl: 0.009827401258365873
          policy_loss: -0.03846276226556963
          total_loss: -0.04668629862782028
          vf_explained_var: -0.614568293094635
          vf_loss: 0.014382244200937243
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,51,1209.97,51000,-0.08,0,-8,394.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-08_22-44-35
  done: false
  episode_len_mean: 395.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 133
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3619123591317073
          entropy_coeff: 0.009999999999999998
          kl: 0.013206466352141118
          policy_loss: -0.07736671273079183
          total_loss: -0.09749220290945636
          vf_explained_var: -0.3794724643230438
          vf_loss: 0.0015126621227763179
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,52,1229.61,52000,-0.08,0,-8,395.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-08_22-44-56
  done: false
  episode_len_mean: 397.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 135
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3009563552008734
          entropy_coeff: 0.009999999999999998
          kl: 0.01394128465326344
          policy_loss: -0.051993075675434534
          total_loss: -0.07145610244737731
          vf_explained_var: -0.7146230340003967
          vf_loss: 0.001455345193648504
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,53,1251.34,53000,-0.08,0,-8,397.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-08_22-45-19
  done: false
  episode_len_mean: 395.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 138
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2666912847095064
          entropy_coeff: 0.009999999999999998
          kl: 0.02542227248930508
          policy_loss: -0.09439502788914575
          total_loss: -0.11237685908046034
          vf_explained_var: -0.4457489550113678
          vf_loss: 0.0008717388354448809
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,54,1273.54,54000,-0.08,0,-8,395.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-08_22-45-42
  done: false
  episode_len_mean: 396.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 141
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3058651420805187
          entropy_coeff: 0.009999999999999998
          kl: 0.016118944485375684
          policy_loss: -0.08837933169884814
          total_loss: -0.10684336293488741
          vf_explained_var: -0.013087103143334389
          vf_loss: 0.0009678579336549673
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,55,1297.27,55000,-0.08,0,-8,396.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-08_22-46-06
  done: false
  episode_len_mean: 395.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 144
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.298864730199178
          entropy_coeff: 0.009999999999999998
          kl: 0.014537312638043767
          policy_loss: -0.028751073363754483
          total_loss: -0.04745924212038517
          vf_explained_var: -0.25435295701026917
          vf_loss: 0.001009583681783018
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,56,1320.75,56000,-0.08,0,-8,395.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-08_22-46-28
  done: false
  episode_len_mean: 394.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 146
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2099067436324225
          entropy_coeff: 0.009999999999999998
          kl: 0.017072885505767053
          policy_loss: -0.06300954417222077
          total_loss: -0.07994554684393936
          vf_explained_var: -0.5519909262657166
          vf_loss: 0.0013216621772800055
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,57,1342.48,57000,-0.08,0,-8,394.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-08_22-46-50
  done: false
  episode_len_mean: 393.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 149
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.114379631148444
          entropy_coeff: 0.009999999999999998
          kl: 0.01771749833097696
          policy_loss: -0.03492727271384663
          total_loss: -0.05054569612774584
          vf_explained_var: -0.7359665036201477
          vf_loss: 0.0015389356891521149
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,58,1365.01,58000,-0.08,0,-8,393.58




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-08_22-47-30
  done: false
  episode_len_mean: 391.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 152
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3285401582717897
          entropy_coeff: 0.009999999999999998
          kl: 0.012710146729502114
          policy_loss: -0.04497535799940427
          total_loss: -0.06441238104469246
          vf_explained_var: -1.0
          vf_loss: 0.000988596663874988
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,59,1404.61,59000,-0.08,0,-8,391.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-08_22-47-51
  done: false
  episode_len_mean: 391.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 155
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.291260642475552
          entropy_coeff: 0.009999999999999998
          kl: 0.012280650121989003
          policy_loss: 0.012815483897510502
          total_loss: -0.006516151585512691
          vf_explained_var: -0.9663519859313965
          vf_loss: 0.0008178246983637413
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,60,1426.11,60000,-0.08,0,-8,391.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-08_22-48-14
  done: false
  episode_len_mean: 390.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 157
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.412110437287225
          entropy_coeff: 0.009999999999999998
          kl: 0.012104715264097779
          policy_loss: -0.019125826905171076
          total_loss: -0.04001436848193407
          vf_explained_var: -0.8404028415679932
          vf_loss: 0.0005090029056494435
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,61,1449.24,61000,-0.08,0,-8,390.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-08_22-48-37
  done: false
  episode_len_mean: 389.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 160
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.126803149117364
          entropy_coeff: 0.009999999999999998
          kl: 0.014495306281032327
          policy_loss: -0.0317774824384186
          total_loss: -0.04861324250491129
          vf_explained_var: -0.8901798725128174
          vf_loss: 0.001170826096010084
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,62,1471.45,62000,-0.08,0,-8,389.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-08_22-49-01
  done: false
  episode_len_mean: 387.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 163
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2664572265413074
          entropy_coeff: 0.009999999999999998
          kl: 0.014927078852138188
          policy_loss: 0.0002993225637409422
          total_loss: -0.018372712325718667
          vf_explained_var: -0.6884844303131104
          vf_loss: 0.0006339438654120184
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,63,1495.78,63000,-0.08,0,-8,387.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-08_22-49-24
  done: false
  episode_len_mean: 386.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3742543617884317
          entropy_coeff: 0.009999999999999998
          kl: 0.01452635616995881
          policy_loss: -0.061899379330376786
          total_loss: -0.08154422856039471
          vf_explained_var: -0.9784031510353088
          vf_loss: 0.0008292621447859952
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,64,1518.85,64000,-0.08,0,-8,386.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-08_22-49-47
  done: false
  episode_len_mean: 385.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 169
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3774163431591457
          entropy_coeff: 0.009999999999999998
          kl: 0.010526984548939487
          policy_loss: -0.0631792938698911
          total_loss: -0.08409340445780092
          vf_explained_var: -1.0
          vf_loss: 0.0004914804180670115
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,65,1542.04,65000,-0.08,0,-8,385.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-08_22-50-07
  done: false
  episode_len_mean: 385.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 171
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.162520572874281
          entropy_coeff: 0.009999999999999998
          kl: 0.01848185026695113
          policy_loss: -0.03198488420910305
          total_loss: -0.04608974589241876
          vf_explained_var: 0.16265185177326202
          vf_loss: 0.003361926046070746
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,66,1561.55,66000,-0.08,0,-8,385.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-08_22-50-29
  done: false
  episode_len_mean: 385.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 173
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.259703678554959
          entropy_coeff: 0.009999999999999998
          kl: 0.014718102937885488
          policy_loss: -0.10364382846487893
          total_loss: -0.12166309936179055
          vf_explained_var: -0.022144949063658714
          vf_loss: 0.001266194409173396
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,67,1583.36,67000,-0.08,0,-8,385.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-08_22-50-52
  done: false
  episode_len_mean: 383.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 176
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.235729063881768
          entropy_coeff: 0.009999999999999998
          kl: 0.02114641391444492
          policy_loss: -0.09006173014640809
          total_loss: -0.10602604115588797
          vf_explained_var: 0.33140355348587036
          vf_loss: 0.0016350379294534732
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,68,1607.1,68000,-0.08,0,-8,383.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-08_22-51-09
  done: false
  episode_len_mean: 384.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 178
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.000262775686052
          entropy_coeff: 0.009999999999999998
          kl: 0.016861841165903137
          policy_loss: -0.08967185960047774
          total_loss: -0.10145696442988184
          vf_explained_var: 0.39008331298828125
          vf_loss: 0.0025266511365771294
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,69,1623.13,69000,-0.08,0,-8,384.98




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-08_22-51-48
  done: false
  episode_len_mean: 384.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3536225159962973
          entropy_coeff: 0.009999999999999998
          kl: 0.018312262220349238
          policy_loss: -0.03850974299841457
          total_loss: -0.054480253987842134
          vf_explained_var: 0.12218069285154343
          vf_loss: 0.001385325816873875
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,70,1662.77,70000,-0.08,0,-8,384.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-08_22-52-12
  done: false
  episode_len_mean: 383.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.349228525161743
          entropy_coeff: 0.009999999999999998
          kl: 0.012909176500294012
          policy_loss: -0.0386127018266254
          total_loss: -0.05691788593928019
          vf_explained_var: -0.4340684711933136
          vf_loss: 0.0008302551086267663
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,71,1686.29,71000,-0.08,0,-8,383.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-08_22-52-36
  done: false
  episode_len_mean: 382.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 187
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2898122337129383
          entropy_coeff: 0.009999999999999998
          kl: 0.014653680493085493
          policy_loss: -0.06987868265973197
          total_loss: -0.08727319888356659
          vf_explained_var: -0.6785624623298645
          vf_loss: 0.0005579864755014165
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,72,1710.22,72000,-0.08,0,-8,382.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-08_22-52-57
  done: false
  episode_len_mean: 381.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 189
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3023396756913925
          entropy_coeff: 0.009999999999999998
          kl: 0.01047689456588728
          policy_loss: -0.07413499068675769
          total_loss: -0.09285674620833662
          vf_explained_var: -0.30364128947257996
          vf_loss: 0.0007656894323493664
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,73,1731.68,73000,-0.08,0,-8,381.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-08_22-53-20
  done: false
  episode_len_mean: 382.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 192
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.299963573614756
          entropy_coeff: 0.009999999999999998
          kl: 0.014066066378535231
          policy_loss: -0.054717164114117624
          total_loss: -0.07218841674427191
          vf_explained_var: -0.24947315454483032
          vf_loss: 0.0007810813585011702
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,74,1754.04,74000,-0.08,0,-8,382.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-08_22-53-42
  done: false
  episode_len_mean: 381.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 195
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.324997655550639
          entropy_coeff: 0.009999999999999998
          kl: 0.009833828881295217
          policy_loss: -0.08724319773415724
          total_loss: -0.10624122296770414
          vf_explained_var: -0.7916245460510254
          vf_loss: 0.0009330346090589754
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,75,1776.36,75000,-0.08,0,-8,381.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-08_22-54-06
  done: false
  episode_len_mean: 381.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 197
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3244151486290825
          entropy_coeff: 0.009999999999999998
          kl: 0.012552975169077465
          policy_loss: -0.07589478223688073
          total_loss: -0.0938447769317362
          vf_explained_var: -0.6586081385612488
          vf_loss: 0.0010575265617161576
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,76,1800.52,76000,-0.08,0,-8,381.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-08_22-54-31
  done: false
  episode_len_mean: 379.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 200
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.30889274014367
          entropy_coeff: 0.009999999999999998
          kl: 0.014820827790441863
          policy_loss: -0.09559950077285369
          total_loss: -0.1126344643947151
          vf_explained_var: -0.7227373123168945
          vf_loss: 0.0010519333364855912
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,77,1825.27,77000,-0.08,0,-8,379.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-08_22-54-51
  done: false
  episode_len_mean: 378.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 203
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2465634518199495
          entropy_coeff: 0.009999999999999998
          kl: 0.013223019210386155
          policy_loss: -0.08145595803442929
          total_loss: -0.09785354644474056
          vf_explained_var: 0.11118893325328827
          vf_loss: 0.0016052763889698933
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,78,1845.85,78000,-0.08,0,-8,378.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-08_22-55-12
  done: false
  episode_len_mean: 377.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 205
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.262646026081509
          entropy_coeff: 0.009999999999999998
          kl: 0.013575509529158659
          policy_loss: -0.16922525564829508
          total_loss: -0.18553290590643884
          vf_explained_var: -0.5496432185173035
          vf_loss: 0.0017370729875336919
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,79,1866.34,79000,-0.08,0,-8,377.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-08_22-55-31
  done: false
  episode_len_mean: 379.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 208
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2137603375646804
          entropy_coeff: 0.009999999999999998
          kl: 0.017901699099185424
          policy_loss: -0.009455122757289144
          total_loss: -0.022417983495526844
          vf_explained_var: 0.05485346168279648
          vf_loss: 0.0031329187121526855
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,80,1885,80000,-0.08,0,-8,379.72




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-08_22-56-06
  done: false
  episode_len_mean: 380.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 210
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1325131800439623
          entropy_coeff: 0.009999999999999998
          kl: 0.017711682529737007
          policy_loss: -0.010036409729056887
          total_loss: -0.02325618945890003
          vf_explained_var: 0.006844132672995329
          vf_loss: 0.002127659199241963
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,81,1919.89,81000,-0.08,0,-8,380.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-08_22-56-23
  done: false
  episode_len_mean: 382.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 212
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0440158314175076
          entropy_coeff: 0.009999999999999998
          kl: 0.016079239081314464
          policy_loss: -0.04429701043085919
          total_loss: -0.05767431252946456
          vf_explained_var: -0.197899729013443
          vf_loss: 0.0016361106407000786
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,82,1937.6,82000,-0.08,0,-8,382.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-08_22-56-43
  done: false
  episode_len_mean: 383.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 214
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.968532841735416
          entropy_coeff: 0.009999999999999998
          kl: 0.015912946879113047
          policy_loss: -0.05212396172185739
          total_loss: -0.06491586636337969
          vf_explained_var: 0.2548357844352722
          vf_loss: 0.0015228037457240538
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,83,1957.46,83000,-0.08,0,-8,383.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-08_22-57-07
  done: false
  episode_len_mean: 380.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.136323384443919
          entropy_coeff: 0.009999999999999998
          kl: 0.021272016356491383
          policy_loss: -0.06089079880879985
          total_loss: -0.07324884798791674
          vf_explained_var: -0.007995199412107468
          vf_loss: 0.0018258790097509822
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,84,1980.91,84000,-0.08,0,-8,380.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-08_22-57-26
  done: false
  episode_len_mean: 380.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 219
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0554081506199307
          entropy_coeff: 0.009999999999999998
          kl: 0.01350382453632027
          policy_loss: -0.026304844001101124
          total_loss: -0.03913926954070727
          vf_explained_var: -0.3821013271808624
          vf_loss: 0.000883344748419606
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,85,2000.28,85000,-0.08,0,-8,380.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-08_22-57-44
  done: false
  episode_len_mean: 384.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 221
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6877509077390036
          entropy_coeff: 0.009999999999999998
          kl: 0.009129648970778526
          policy_loss: 0.012613187399175432
          total_loss: 0.0015393344892395866
          vf_explained_var: 0.059782251715660095
          vf_loss: 0.001181771804112941
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,86,2018.5,86000,-0.08,0,-8,384.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-08_22-58-09
  done: false
  episode_len_mean: 384.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 224
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.02806466155582
          entropy_coeff: 0.009999999999999998
          kl: 0.012558149039444721
          policy_loss: -0.08117484061254396
          total_loss: -0.0937836094862885
          vf_explained_var: -0.060790613293647766
          vf_loss: 0.0013143147972490019
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,87,2042.93,87000,-0.08,0,-8,384.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-08_22-58-26
  done: false
  episode_len_mean: 384.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 226
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.989216117064158
          entropy_coeff: 0.009999999999999998
          kl: 0.012275965139825158
          policy_loss: -0.03985252728064855
          total_loss: -0.0517083509100808
          vf_explained_var: -0.8187400102615356
          vf_loss: 0.0018216300218935229
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,88,2059.92,88000,-0.08,0,-8,384.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-08_22-58-47
  done: false
  episode_len_mean: 385.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 229
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1900615016619365
          entropy_coeff: 0.009999999999999998
          kl: 0.010667755351284648
          policy_loss: -0.08566512645532688
          total_loss: -0.10092959453662237
          vf_explained_var: -0.5605487823486328
          vf_loss: 0.0012355952483226753
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,89,2081.61,89000,0,0,0,385.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-08_22-59-06
  done: false
  episode_len_mean: 388.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 231
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.945875644683838
          entropy_coeff: 0.009999999999999998
          kl: 0.014647120655418103
          policy_loss: -0.08652784740552306
          total_loss: -0.09561740406271484
          vf_explained_var: -0.6541849970817566
          vf_loss: 0.0029540910036303103
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,90,2100.08,90000,0,0,0,388.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-08_22-59-24
  done: false
  episode_len_mean: 388.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 233
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.90602355533176
          entropy_coeff: 0.009999999999999998
          kl: 0.014528256045047537
          policy_loss: -0.13667444027960302
          total_loss: -0.14580507522655858
          vf_explained_var: -0.5289776921272278
          vf_loss: 0.002574669568437255
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,91,2117.64,91000,0,0,0,388.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-08_22-59-40
  done: false
  episode_len_mean: 390.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 235
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1234439717398748
          entropy_coeff: 0.009999999999999998
          kl: 0.012404212664720483
          policy_loss: -0.06976213753223419
          total_loss: -0.08297720510098669
          vf_explained_var: -0.6527645587921143
          vf_loss: 0.0017397373251798046
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,92,2134.16,92000,0,0,0,390.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-08_23-00-02
  done: false
  episode_len_mean: 392.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 238
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1534653690126206
          entropy_coeff: 0.009999999999999998
          kl: 0.012165077666019602
          policy_loss: -0.096785420883033
          total_loss: -0.11050434878302945
          vf_explained_var: -0.05611984431743622
          vf_loss: 0.0016571527458533334
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,93,2156.32,93000,0,0,0,392.8




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-08_23-00-43
  done: false
  episode_len_mean: 391.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 240
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2434266673194037
          entropy_coeff: 0.009999999999999998
          kl: 0.011137247041218462
          policy_loss: -0.0811996421466271
          total_loss: -0.09710079516387649
          vf_explained_var: -0.6532533168792725
          vf_loss: 0.0008948819200870477
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,94,2197.28,94000,0,0,0,391.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-08_23-01-05
  done: false
  episode_len_mean: 392.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 243
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9663493315378824
          entropy_coeff: 0.009999999999999998
          kl: 0.016200508654060246
          policy_loss: -0.07327524998949633
          total_loss: -0.08277041531271405
          vf_explained_var: -0.13778497278690338
          vf_loss: 0.0019668192599460277
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,95,2219.26,95000,0,0,0,392.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-08_23-01-30
  done: false
  episode_len_mean: 391.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 246
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1742872609032524
          entropy_coeff: 0.009999999999999998
          kl: 0.01293452021580819
          policy_loss: -0.02027172384162744
          total_loss: -0.03492855288916164
          vf_explained_var: -0.9099764227867126
          vf_loss: 0.0005379428359447048
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,96,2243.59,96000,0,0,0,391.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-08_23-01-48
  done: false
  episode_len_mean: 392.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 248
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.875862142774794
          entropy_coeff: 0.009999999999999998
          kl: 0.012689826149709343
          policy_loss: -0.09488005940284994
          total_loss: -0.10476464264922672
          vf_explained_var: -0.0387202650308609
          vf_loss: 0.002449814091151994
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,97,2261.96,97000,0,0,0,392.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-08_23-02-10
  done: false
  episode_len_mean: 394.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 251
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.040427429146237
          entropy_coeff: 0.009999999999999998
          kl: 0.009993956959066767
          policy_loss: -0.09966759292615784
          total_loss: -0.11366588121487034
          vf_explained_var: -0.19488725066184998
          vf_loss: 0.0013465444369810736
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,98,2283.54,98000,0,0,0,394.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-08_23-02-27
  done: false
  episode_len_mean: 395.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 253
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.916324680381351
          entropy_coeff: 0.009999999999999998
          kl: 0.016092335884960392
          policy_loss: -0.01680586843027009
          total_loss: -0.024813458737399843
          vf_explained_var: 0.5941171050071716
          vf_loss: 0.0030089072831388976
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,99,2301.08,99000,0,0,0,395.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-08_23-02-47
  done: false
  episode_len_mean: 396.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 255
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2188984950383506
          entropy_coeff: 0.009999999999999998
          kl: 0.012628417047814637
          policy_loss: -0.025921131898131635
          total_loss: -0.031619049939844344
          vf_explained_var: -0.8429447412490845
          vf_loss: 0.010097929850841562
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,100,2321.25,100000,0,0,0,396.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-08_23-03-06
  done: false
  episode_len_mean: 398.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 258
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9938614659839207
          entropy_coeff: 0.009999999999999998
          kl: 0.011389517918629365
          policy_loss: 0.025167532223794194
          total_loss: 0.015814264367024104
          vf_explained_var: -0.1416812390089035
          vf_loss: 0.004819402160743873
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,101,2339.66,101000,0,0,0,398.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-08_23-03-30
  done: false
  episode_len_mean: 398.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 261
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1996815681457518
          entropy_coeff: 0.009999999999999998
          kl: 0.010497567000470553
          policy_loss: -0.09059639970461528
          total_loss: -0.10669282625118891
          vf_explained_var: -0.9082037210464478
          vf_loss: 0.0005859980928815073
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,102,2364.19,102000,0,0,0,398.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-08_23-03-52
  done: false
  episode_len_mean: 398.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 263
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1517100758022734
          entropy_coeff: 0.009999999999999998
          kl: 0.010799642710628988
          policy_loss: -0.10517976035674413
          total_loss: -0.12022087547100252
          vf_explained_var: -0.3872583210468292
          vf_loss: 0.0010086670220415626
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,103,2385.83,103000,0,0,0,398.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-08_23-04-14
  done: false
  episode_len_mean: 399.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 266
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1332071410285103
          entropy_coeff: 0.009999999999999998
          kl: 0.012072952876289457
          policy_loss: -0.03423029482364655
          total_loss: -0.04822095466984643
          vf_explained_var: -0.7292592525482178
          vf_loss: 0.0012294808932993976
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,104,2407.86,104000,0,0,0,399.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-08_23-04-37
  done: false
  episode_len_mean: 399.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0225290258725486
          entropy_coeff: 0.009999999999999998
          kl: 0.012576639837789037
          policy_loss: -0.0597364100938042
          total_loss: -0.07225688499295049
          vf_explained_var: -0.47089818120002747
          vf_loss: 0.0013378903340910458
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,105,2431.1,105000,0,0,0,399.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-08_23-05-18
  done: false
  episode_len_mean: 396.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 272
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.269061554802789
          entropy_coeff: 0.009999999999999998
          kl: 0.011750085814144424
          policy_loss: -0.03636095737003618
          total_loss: -0.052604872102124826
          vf_explained_var: -0.4095575213432312
          vf_loss: 0.0004982211960143306
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,106,2472.26,106000,0,0,0,396.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-08_23-05-43
  done: false
  episode_len_mean: 395.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 275
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2121858808729384
          entropy_coeff: 0.009999999999999998
          kl: 0.01037064690772124
          policy_loss: 0.018809855522380934
          total_loss: 0.002577398055129581
          vf_explained_var: -0.8149803876876831
          vf_loss: 0.0006392587480756143
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,107,2497.21,107000,0,0,0,395.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-08_23-06-05
  done: false
  episode_len_mean: 396.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 277
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9765753070513408
          entropy_coeff: 0.009999999999999998
          kl: 0.014974445982284588
          policy_loss: -0.12153938747942447
          total_loss: -0.13063787524071005
          vf_explained_var: -0.37565112113952637
          vf_loss: 0.0030864531620560837
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,108,2518.3,108000,0,0,0,396.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-08_23-06-25
  done: false
  episode_len_mean: 395.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 280
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0497716943422954
          entropy_coeff: 0.009999999999999998
          kl: 0.017251881522978903
          policy_loss: -0.0638225971824593
          total_loss: -0.0733367575953404
          vf_explained_var: -0.6517626643180847
          vf_loss: 0.0022497910980342163
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,109,2538.99,109000,0,0,0,395.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-08_23-06-49
  done: false
  episode_len_mean: 394.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 282
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2387998077604507
          entropy_coeff: 0.009999999999999998
          kl: 0.008444272160734827
          policy_loss: -0.04587526499397225
          total_loss: -0.06330246383117305
          vf_explained_var: -0.7386214137077332
          vf_loss: 0.0006858846247066847
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,110,2562.29,110000,0,0,0,394.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-08_23-07-11
  done: false
  episode_len_mean: 394.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 285
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.15925608476003
          entropy_coeff: 0.009999999999999998
          kl: 0.010512646749812053
          policy_loss: -0.0790982662079235
          total_loss: -0.09446407973559366
          vf_explained_var: -0.15123264491558075
          vf_loss: 0.0009047147740299503
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,111,2585.02,111000,0,0,0,394.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-08_23-07-31
  done: false
  episode_len_mean: 396.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 288
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0556757873959013
          entropy_coeff: 0.009999999999999998
          kl: 0.012585879094999116
          policy_loss: 0.00705648482673698
          total_loss: -0.005608311171332995
          vf_explained_var: -0.21520915627479553
          vf_loss: 0.0015203609803898467
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,112,2605.22,112000,0,0,0,396.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-08_23-07-56
  done: false
  episode_len_mean: 394.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 291
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2524465719858804
          entropy_coeff: 0.009999999999999998
          kl: 0.013324697247394034
          policy_loss: -0.04726184184352557
          total_loss: -0.06243873569700453
          vf_explained_var: -0.4443131387233734
          vf_loss: 0.0006019408979530756
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,113,2629.64,113000,0,0,0,394.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-08_23-08-19
  done: false
  episode_len_mean: 395.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 293
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2239755445056493
          entropy_coeff: 0.009999999999999998
          kl: 0.015042013222586616
          policy_loss: -0.10307284866770108
          total_loss: -0.11654376785995232
          vf_explained_var: -0.7694089412689209
          vf_loss: 0.0011538168037077412
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,114,2652.21,114000,0,0,0,395.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-08_23-08-41
  done: false
  episode_len_mean: 394.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 296
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0175758573744034
          entropy_coeff: 0.009999999999999998
          kl: 0.014495468123434366
          policy_loss: -0.06922422030733691
          total_loss: -0.08009611388875379
          vf_explained_var: -0.043599966913461685
          vf_loss: 0.0019655326769376794
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,115,2674.85,115000,0,0,0,394.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-08_23-09-05
  done: false
  episode_len_mean: 394.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 299
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.872207760810852
          entropy_coeff: 0.009999999999999998
          kl: 0.013218433954809051
          policy_loss: -0.03200850205288993
          total_loss: -0.041931453636950916
          vf_explained_var: -0.2534123957157135
          vf_loss: 0.002107294690924593
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,116,2698.2,116000,0,0,0,394.35




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-08_23-09-46
  done: false
  episode_len_mean: 394.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 302
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7239930550257365
          entropy_coeff: 0.009999999999999998
          kl: 0.015230076013861539
          policy_loss: -0.028015646297070713
          total_loss: -0.03582853236132198
          vf_explained_var: -0.38605690002441406
          vf_loss: 0.0017168194896334575
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,117,2739.52,117000,0,0,0,394.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-08_23-10-08
  done: false
  episode_len_mean: 393.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 305
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.831302687856886
          entropy_coeff: 0.009999999999999998
          kl: 0.01127742399706579
          policy_loss: -0.0801909953976671
          total_loss: -0.09113572815226184
          vf_explained_var: -0.5459491610527039
          vf_loss: 0.0016591005395942679
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,118,2761.83,118000,0,0,0,393.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-08_23-10-33
  done: false
  episode_len_mean: 389.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 308
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0377213650279575
          entropy_coeff: 0.009999999999999998
          kl: 0.012281207250555207
          policy_loss: -0.07096292914615737
          total_loss: -0.08384695040682952
          vf_explained_var: -0.4401070475578308
          vf_loss: 0.001275828926009126
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,119,2786.4,119000,0,0,0,389.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-08_23-10-56
  done: false
  episode_len_mean: 385.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 311
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8403345982233683
          entropy_coeff: 0.009999999999999998
          kl: 0.011175974735228895
          policy_loss: 0.0143482295382354
          total_loss: 0.002801072204278575
          vf_explained_var: -0.3721514344215393
          vf_loss: 0.0011983522627916601
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,120,2810.04,120000,0,0,0,385.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-08_23-11-22
  done: false
  episode_len_mean: 380.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 314
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2725888305240207
          entropy_coeff: 0.009999999999999998
          kl: 0.010072847948750675
          policy_loss: -0.04137133748994933
          total_loss: -0.04749004638029469
          vf_explained_var: -0.1934877634048462
          vf_loss: 0.0015078018891573365
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,121,2835.33,121000,0,0,0,380.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-08_23-11-46
  done: false
  episode_len_mean: 380.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 317
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.043769583437178
          entropy_coeff: 0.009999999999999998
          kl: 0.01309650678558564
          policy_loss: -0.07775579893754589
          total_loss: -0.09044041877819432
          vf_explained_var: -0.4208114743232727
          vf_loss: 0.001122968086403691
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,122,2859.33,122000,0,0,0,380.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-08_23-12-11
  done: false
  episode_len_mean: 376.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 320
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.245028485192193
          entropy_coeff: 0.009999999999999998
          kl: 0.014374335681834162
          policy_loss: -0.08939994343866904
          total_loss: -0.10415366890115871
          vf_explained_var: -0.8785821795463562
          vf_loss: 0.0004195490806725704
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,123,2884.78,123000,0,0,0,376.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-08_23-12-37
  done: false
  episode_len_mean: 373.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 323
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.660281687312656
          entropy_coeff: 0.009999999999999998
          kl: 0.010990167807805819
          policy_loss: -0.02203156914975908
          total_loss: -0.03199408170249727
          vf_explained_var: -0.2163001149892807
          vf_loss: 0.0010765307207798792
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,124,2910.64,124000,0,0,0,373.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-08_23-13-01
  done: false
  episode_len_mean: 371.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 326
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.211394829220242
          entropy_coeff: 0.009999999999999998
          kl: 0.012653230882224815
          policy_loss: -0.05422691893246439
          total_loss: -0.0693281843430466
          vf_explained_var: -0.9831116795539856
          vf_loss: 0.0006069851680270707
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,125,2934.93,125000,0,0,0,371.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-08_23-13-27
  done: false
  episode_len_mean: 369.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 329
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8665734397040472
          entropy_coeff: 0.009999999999999998
          kl: 0.016477663312632392
          policy_loss: -0.009579702963431675
          total_loss: -0.018688582215044235
          vf_explained_var: -0.7358296513557434
          vf_loss: 0.0012150366114737053
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,126,2960.08,126000,0,0,0,369.14




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-08_23-14-09
  done: false
  episode_len_mean: 364.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 332
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.239741656515333
          entropy_coeff: 0.009999999999999998
          kl: 0.013795654510252744
          policy_loss: -0.07273837501804034
          total_loss: -0.08757312219176028
          vf_explained_var: -0.9604089260101318
          vf_loss: 0.0005786161229480058
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,127,3002.24,127000,0,0,0,364.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-08_23-14-31
  done: false
  episode_len_mean: 360.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 335
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0888241291046143
          entropy_coeff: 0.009999999999999998
          kl: 0.011302204007664797
          policy_loss: -0.1273831910557217
          total_loss: -0.1413718303044637
          vf_explained_var: -0.7902052998542786
          vf_loss: 0.001177860462727646
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,128,3024.62,128000,0,0,0,360.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-08_23-14-56
  done: false
  episode_len_mean: 358.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 338
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0727776646614076
          entropy_coeff: 0.009999999999999998
          kl: 0.012842727168161617
          policy_loss: -0.07229482034842173
          total_loss: -0.08591681313183573
          vf_explained_var: -0.9435417056083679
          vf_loss: 0.0006041530788125885
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,129,3049.18,129000,0,0,0,358.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-08_23-15-20
  done: false
  episode_len_mean: 358.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8675182183583579
          entropy_coeff: 0.009999999999999998
          kl: 0.012340248164265195
          policy_loss: -0.046084472661217055
          total_loss: -0.05706049671603574
          vf_explained_var: -0.5855135917663574
          vf_loss: 0.001451906481330904
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,130,3073.08,130000,0,0,0,358.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-08_23-15-43
  done: false
  episode_len_mean: 358.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 344
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.273771654234992
          entropy_coeff: 0.009999999999999998
          kl: 0.009687102212392417
          policy_loss: -0.04928523374514447
          total_loss: -0.06668323083884187
          vf_explained_var: -1.0
          vf_loss: 0.00043562477237881267
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 192.168.3.5
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,131,3095.9,131000,0,0,0,358.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-08_23-16-05
  done: false
  episode_len_mean: 358.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 346
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1718422452608745
          entropy_coeff: 0.009999999999999998
          kl: 0.007600146655582988
          policy_loss: 0.018804535932011073
          total_loss: 0.0013864114880561829
          vf_explained_var: -0.8668370842933655
          vf_loss: 0.0004527222892243622
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,132,3118.55,132000,0,0,0,358.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-08_23-16-30
  done: false
  episode_len_mean: 356.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 349
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0629893806245594
          entropy_coeff: 0.009999999999999998
          kl: 0.015304354593839836
          policy_loss: -0.10205989219248295
          total_loss: -0.11355269439518452
          vf_explained_var: -0.2836264669895172
          vf_loss: 0.0013892636666747017
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,133,3143.42,133000,0,0,0,356.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-08_23-16-55
  done: false
  episode_len_mean: 353.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 352
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0776441547605726
          entropy_coeff: 0.009999999999999998
          kl: 0.0087493021244723
          policy_loss: -0.05935653232865863
          total_loss: -0.07521145604550838
          vf_explained_var: -0.49706417322158813
          vf_loss: 0.000492180716052341
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,134,3167.96,134000,0,0,0,353.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-08_23-17-19
  done: false
  episode_len_mean: 350.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 355
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0001428339216445
          entropy_coeff: 0.009999999999999998
          kl: 0.011300161530384158
          policy_loss: -0.06426725693874889
          total_loss: -0.07767029636436039
          vf_explained_var: -0.5436685085296631
          vf_loss: 0.000877681885807154
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,135,3191.89,135000,0,0,0,350.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-08_23-17-42
  done: false
  episode_len_mean: 347.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 358
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0627296845118206
          entropy_coeff: 0.009999999999999998
          kl: 0.018143161967880586
          policy_loss: -0.06361365918484
          total_loss: -0.0743502527061436
          vf_explained_var: -0.6961715817451477
          vf_loss: 0.0007057287167602529
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,136,3215.68,136000,0,0,0,347.8




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-08_23-18-24
  done: false
  episode_len_mean: 348.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 361
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.031454188293881
          entropy_coeff: 0.009999999999999998
          kl: 0.012041376752722677
          policy_loss: -0.01861624088552263
          total_loss: -0.03198476526886225
          vf_explained_var: -0.976683497428894
          vf_loss: 0.0008500698184232331
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,137,3257.19,137000,0,0,0,348.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-08_23-18-48
  done: false
  episode_len_mean: 346.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 364
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.092327292760213
          entropy_coeff: 0.009999999999999998
          kl: 0.012597775263944714
          policy_loss: -0.07693869205605652
          total_loss: -0.0909030083566904
          vf_explained_var: -1.0
          vf_loss: 0.0005813305977628463
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip: 192.168.3.5
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,138,3281.13,138000,0,0,0,346.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-08_23-19-12
  done: false
  episode_len_mean: 345.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 367
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0003366894192167
          entropy_coeff: 0.009999999999999998
          kl: 0.014540283118576053
          policy_loss: -0.039620974411567055
          total_loss: -0.0513872342184186
          vf_explained_var: -0.7924425601959229
          vf_loss: 0.0008760884090507817
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,139,3305.02,139000,0,0,0,345.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-08_23-19-36
  done: false
  episode_len_mean: 346.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 370
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9543923748864067
          entropy_coeff: 0.009999999999999998
          kl: 0.011215961649820767
          policy_loss: -0.0423765216436651
          total_loss: -0.05528161482264598
          vf_explained_var: 0.04584354907274246
          vf_loss: 0.0009607484318419463
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,140,3328.78,140000,0,0,0,346.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-08_23-20-00
  done: false
  episode_len_mean: 345.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 373
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.149433403544956
          entropy_coeff: 0.009999999999999998
          kl: 0.010958009174920038
          policy_loss: -0.07849449457393752
          total_loss: -0.0940269453657998
          vf_explained_var: -0.6526385545730591
          vf_loss: 0.0004143910264247097
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,141,3353.58,141000,0,0,0,345.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-08_23-20-24
  done: false
  episode_len_mean: 345.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 376
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.074198234081268
          entropy_coeff: 0.009999999999999998
          kl: 0.012152875522440926
          policy_loss: -0.07162317399763399
          total_loss: -0.08570199445303943
          vf_explained_var: -0.9522650837898254
          vf_loss: 0.0005107670029651166
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,142,3377.33,142000,0,0,0,345.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-08_23-20-50
  done: false
  episode_len_mean: 342.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 379
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.185889320903354
          entropy_coeff: 0.009999999999999998
          kl: 0.008383775146858143
          policy_loss: -0.10173516710185343
          total_loss: -0.11895453652573956
          vf_explained_var: -1.0
          vf_loss: 0.00039523598841494984
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip: 192.168.3.5
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,143,3403.37,143000,0,0,0,342.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-08_23-21-14
  done: false
  episode_len_mean: 341.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 382
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.120745735698276
          entropy_coeff: 0.009999999999999998
          kl: 0.008484741169738106
          policy_loss: -0.008335856596628825
          total_loss: -0.02471697914103667
          vf_explained_var: -0.7656590938568115
          vf_loss: 0.0005309321338750629
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,144,3427.35,144000,0,0,0,341.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-08_23-21-41
  done: false
  episode_len_mean: 340.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 385
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.242173851860894
          entropy_coeff: 0.009999999999999998
          kl: 0.013747405948499195
          policy_loss: -0.019158298605018192
          total_loss: -0.033963662075499695
          vf_explained_var: -0.8832048177719116
          vf_loss: 0.0006567497708601877
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,145,3453.75,145000,0,0,0,340.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-08_23-22-07
  done: false
  episode_len_mean: 337.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 389
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.071095155345069
          entropy_coeff: 0.009999999999999998
          kl: 0.018398538062298877
          policy_loss: -0.07630377705726359
          total_loss: -0.08705590313507451
          vf_explained_var: -0.28008413314819336
          vf_loss: 0.0006445616018027067
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,146,3479.8,146000,0,0,0,337.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-08_23-22-50
  done: false
  episode_len_mean: 335.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 392
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.109456585513221
          entropy_coeff: 0.009999999999999998
          kl: 0.007674212114247325
          policy_loss: -0.042203682598968346
          total_loss: -0.05895053484580583
          vf_explained_var: -1.0
          vf_loss: 0.0004626433609195778
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_ip: 192.168.3.5
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,147,3522.89,147000,0,0,0,335.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-08_23-23-16
  done: false
  episode_len_mean: 334.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 395
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.125132958094279
          entropy_coeff: 0.009999999999999998
          kl: 0.010075205728313285
          policy_loss: -0.0038068118194739026
          total_loss: -0.019356471921006837
          vf_explained_var: -0.3867030143737793
          vf_loss: 0.0006010969402268529
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,148,3548.53,148000,0,0,0,334.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-08_23-23-41
  done: false
  episode_len_mean: 332.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 398
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.160325762960646
          entropy_coeff: 0.009999999999999998
          kl: 0.009846698879898478
          policy_loss: -0.008837690566562945
          total_loss: -0.025059330815242396
          vf_explained_var: -0.6934008002281189
          vf_loss: 0.00039672657148912547
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,149,3574.36,149000,0,0,0,332.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-08_23-24-07
  done: false
  episode_len_mean: 332.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 401
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.176967085732354
          entropy_coeff: 0.009999999999999998
          kl: 0.011050155219717354
          policy_loss: -0.06936778711775939
          total_loss: -0.08530529758168591
          vf_explained_var: -0.9915843605995178
          vf_loss: 0.0002380169699285438
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,150,3599.82,150000,0,0,0,332.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-08_23-24-32
  done: false
  episode_len_mean: 331.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 404
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.129661644829644
          entropy_coeff: 0.009999999999999998
          kl: 0.009009001175006635
          policy_loss: -0.010620583117836051
          total_loss: -0.026857507083978917
          vf_explained_var: -0.951460599899292
          vf_loss: 0.0004988822157934515
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,151,3625.35,151000,0,0,0,331.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-08_23-25-00
  done: false
  episode_len_mean: 329.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 408
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8025411089261374
          entropy_coeff: 0.009999999999999998
          kl: 0.007838826039670968
          policy_loss: -0.01828382545047336
          total_loss: -0.031601103809144764
          vf_explained_var: -0.0465681217610836
          vf_loss: 0.0007397244141126672
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,152,3653.34,152000,0,0,0,329.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-08_23-25-27
  done: false
  episode_len_mean: 328.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 411
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.228970750172933
          entropy_coeff: 0.009999999999999998
          kl: 0.012935136807075646
          policy_loss: -0.03232637991507848
          total_loss: -0.04769974057045248
          vf_explained_var: -0.8381566405296326
          vf_loss: 0.00036793097598193626
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,153,3679.84,153000,0,0,0,328.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-08_23-25-53
  done: false
  episode_len_mean: 327.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 415
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.130254497792986
          entropy_coeff: 0.009999999999999998
          kl: 0.010937613573659029
          policy_loss: -0.006836389905462662
          total_loss: -0.022221657033595774
          vf_explained_var: -1.0
          vf_loss: 0.00038010900769020535
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 192.168.3.5
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,154,3706.3,154000,0,0,0,327.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-08_23-26-19
  done: false
  episode_len_mean: 327.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 418
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.188335484928555
          entropy_coeff: 0.009999999999999998
          kl: 0.012949767652416986
          policy_loss: -0.08984893038868905
          total_loss: -0.10452528289622731
          vf_explained_var: -0.3727972209453583
          vf_loss: 0.0006511840199689484
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,155,3731.6,155000,0,0,0,327.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-08_23-27-03
  done: false
  episode_len_mean: 327.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 421
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1267157740063136
          entropy_coeff: 0.009999999999999998
          kl: 0.011758236064850772
          policy_loss: -0.07593049543599288
          total_loss: -0.09048297678430875
          vf_explained_var: -0.3521308898925781
          vf_loss: 0.000762070110714477
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,156,3776.17,156000,0,0,0,327.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-08_23-27-29
  done: false
  episode_len_mean: 326.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 424
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.222752499580383
          entropy_coeff: 0.009999999999999998
          kl: 0.011976529063826588
          policy_loss: -0.028327179555263784
          total_loss: -0.04393996364540524
          vf_explained_var: -0.5038268566131592
          vf_loss: 0.0005516215657634247
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,157,3802.06,157000,0,0,0,326.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-08_23-27-56
  done: false
  episode_len_mean: 325.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 428
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.137342784139845
          entropy_coeff: 0.009999999999999998
          kl: 0.014653071988779291
          policy_loss: -0.08737322729494837
          total_loss: -0.1009672066817681
          vf_explained_var: -0.9978657960891724
          vf_loss: 0.000361329147805615
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,158,3829.14,158000,0,0,0,325.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-08_23-28-22
  done: false
  episode_len_mean: 325.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 431
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2492742247051662
          entropy_coeff: 0.009999999999999998
          kl: 0.010748807356521261
          policy_loss: -0.07796781187256177
          total_loss: -0.09448159461220106
          vf_explained_var: -0.9836753010749817
          vf_loss: 0.0005373743776645925
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,159,3854.74,159000,0,0,0,325.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-08_23-28-49
  done: false
  episode_len_mean: 324.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 434
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.137828312979804
          entropy_coeff: 0.009999999999999998
          kl: 0.009572598978382826
          policy_loss: -0.06379181243893173
          total_loss: -0.07995775014989906
          vf_explained_var: -1.0
          vf_loss: 0.0003662170798634179
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 192.168.3.5
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,160,3881.65,160000,0,0,0,324.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-08_23-29-15
  done: false
  episode_len_mean: 322.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 437
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1675423834058973
          entropy_coeff: 0.009999999999999998
          kl: 0.008430285369341097
          policy_loss: -0.06142146945413616
          total_loss: -0.07843511499878433
          vf_explained_var: -1.0
          vf_loss: 0.00039394490983492384
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip: 192.168.3.5
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,161,3907.46,161000,0,0,0,322.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-08_23-29-41
  done: false
  episode_len_mean: 321.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 441
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9942681074142456
          entropy_coeff: 0.009999999999999998
          kl: 0.013563082789207826
          policy_loss: -0.05484459329810407
          total_loss: -0.06713649976170726
          vf_explained_var: -0.77294921875
          vf_loss: 0.0007844655987961839
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,162,3934.05,162000,0,0,0,321.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-08_23-30-06
  done: false
  episode_len_mean: 320.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 444
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.156164598464966
          entropy_coeff: 0.009999999999999998
          kl: 0.013620269928175145
          policy_loss: -0.07069862687753306
          total_loss: -0.08486365502079328
          vf_explained_var: -1.0
          vf_loss: 0.0005013554984341479
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip: 192.168.3.5
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,163,3958.38,163000,0,0,0,320.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-08_23-30-31
  done: false
  episode_len_mean: 318.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 447
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.127608331044515
          entropy_coeff: 0.009999999999999998
          kl: 0.009059542496914588
          policy_loss: -0.019229722768068315
          total_loss: -0.03553175443990363
          vf_explained_var: -1.0
          vf_loss: 0.0003876585655638741
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip: 192.168.3.5
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,164,3983.67,164000,0,0,0,318.53




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-08_23-31-13
  done: false
  episode_len_mean: 318.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 450
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1407299068239
          entropy_coeff: 0.009999999999999998
          kl: 0.010209962995320716
          policy_loss: -0.08753097375027008
          total_loss: -0.103313976753917
          vf_explained_var: -1.0
          vf_loss: 0.00045550237215744954
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip: 192.168.3.5
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,165,4026.04,165000,0,0,0,318.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-08_23-31-37
  done: false
  episode_len_mean: 318.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 453
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1287573602464462
          entropy_coeff: 0.009999999999999998
          kl: 0.010266752506364617
          policy_loss: -0.10067419666383001
          total_loss: -0.11610440247588688
          vf_explained_var: -0.6353879570960999
          vf_loss: 0.0006598242927187433
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,166,4049.53,166000,0,0,0,318.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-08_23-32-00
  done: false
  episode_len_mean: 318.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 456
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1491930643717447
          entropy_coeff: 0.009999999999999998
          kl: 0.010427817622446886
          policy_loss: -0.03729295397384299
          total_loss: -0.05271793020268281
          vf_explained_var: -0.7999922037124634
          vf_loss: 0.0007878697706877978
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,167,4072.98,167000,0,0,0,318.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-08_23-32-24
  done: false
  episode_len_mean: 319.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 458
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.108713510301378
          entropy_coeff: 0.009999999999999998
          kl: 0.012737452871526955
          policy_loss: -0.06154527312351598
          total_loss: -0.07567012262427145
          vf_explained_var: -1.0
          vf_loss: 0.0005139493383467198
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 192.168.3.5
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,168,4096.52,168000,0,0,0,319.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-08_23-32-48
  done: false
  episode_len_mean: 318.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 461
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.111484400431315
          entropy_coeff: 0.009999999999999998
          kl: 0.011500304199372646
          policy_loss: -0.08554246715373463
          total_loss: -0.10042602759268549
          vf_explained_var: -1.0
          vf_loss: 0.0004092551447683945
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip: 192.168.3.5
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,169,4120.15,169000,0,0,0,318.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-08_23-33-11
  done: false
  episode_len_mean: 319.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 464
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0450782113605075
          entropy_coeff: 0.009999999999999998
          kl: 0.01079400435233059
          policy_loss: -0.018182615232136514
          total_loss: -0.032307158017324075
          vf_explained_var: -0.8796015381813049
          vf_loss: 0.0008617746672825888
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,170,4143.91,170000,0,0,0,319.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-08_23-33-35
  done: false
  episode_len_mean: 319.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 467
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0303448783026803
          entropy_coeff: 0.009999999999999998
          kl: 0.008993874144068995
          policy_loss: -0.07787966146651241
          total_loss: -0.09322124489893516
          vf_explained_var: -1.0
          vf_loss: 0.0004087150300620124
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 192.168.3.5
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,171,4167.36,171000,0,0,0,319.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-08_23-34-01
  done: false
  episode_len_mean: 318.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 470
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7477539393636916
          entropy_coeff: 0.009999999999999998
          kl: 0.010895750635984629
          policy_loss: 0.0592180702680101
          total_loss: 0.04784127682861355
          vf_explained_var: -0.4710247814655304
          vf_loss: 0.0005847707292155569
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,172,4193.48,172000,0,0,0,318.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-08_23-34-25
  done: false
  episode_len_mean: 319.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 473
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0360214524798925
          entropy_coeff: 0.009999999999999998
          kl: 0.01736433605342559
          policy_loss: -0.02982286187923617
          total_loss: -0.040829659812152384
          vf_explained_var: -0.657221794128418
          vf_loss: 0.000562718893504805
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,173,4217.53,173000,0,0,0,319.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-08_23-34-47
  done: false
  episode_len_mean: 320.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 476
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9325995153850979
          entropy_coeff: 0.009999999999999998
          kl: 0.013077340244461604
          policy_loss: -0.08620312506746915
          total_loss: -0.09734486286631888
          vf_explained_var: -0.5454381108283997
          vf_loss: 0.0015638552276262393
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,174,4239.25,174000,0,0,0,320.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-08_23-35-12
  done: false
  episode_len_mean: 320.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 479
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.784395260281033
          entropy_coeff: 0.009999999999999998
          kl: 0.015583781576903852
          policy_loss: -0.025401178664631315
          total_loss: -0.03186219562258985
          vf_explained_var: -0.1497546285390854
          vf_loss: 0.0034936478975901588
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,175,4264.34,175000,0,0,0,320.5




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-08_23-35-59
  done: false
  episode_len_mean: 319.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 482
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.056590029928419
          entropy_coeff: 0.009999999999999998
          kl: 0.01601880373549732
          policy_loss: -0.06454461212787363
          total_loss: -0.07543310626513428
          vf_explained_var: -0.9121801853179932
          vf_loss: 0.0015678855626093638
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,176,4311.69,176000,0,0,0,319.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-08_23-36-27
  done: false
  episode_len_mean: 319.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 485
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0190119915538363
          entropy_coeff: 0.009999999999999998
          kl: 0.008979621270644481
          policy_loss: 0.07048414217101204
          total_loss: 0.05559999284644922
          vf_explained_var: -0.6600175499916077
          vf_loss: 0.0007600376259587292
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,177,4339.28,177000,0,0,0,319.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-08_23-36-52
  done: false
  episode_len_mean: 321.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 488
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.051353869173262
          entropy_coeff: 0.009999999999999998
          kl: 0.012513984116372652
          policy_loss: -0.027446228596899245
          total_loss: -0.04015772177113427
          vf_explained_var: -0.4219101071357727
          vf_loss: 0.0014668387897674821
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,178,4364.25,178000,0,0,0,321.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-08_23-37-17
  done: false
  episode_len_mean: 322.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 491
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.058898615837097
          entropy_coeff: 0.009999999999999998
          kl: 0.00651713766046574
          policy_loss: -0.0631789881322119
          total_loss: -0.07978983496626219
          vf_explained_var: -0.6399904489517212
          vf_loss: 0.0006788386584958061
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,179,4389.17,179000,0,0,0,322.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-08_23-37-44
  done: false
  episode_len_mean: 323.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 494
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9401443521181743
          entropy_coeff: 0.009999999999999998
          kl: 0.035527124156893325
          policy_loss: 0.038802219099468654
          total_loss: 0.04010238183869256
          vf_explained_var: -0.2528732120990753
          vf_loss: 0.00271600059285346
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,180,4416.29,180000,0,0,0,323.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-08_23-38-08
  done: false
  episode_len_mean: 324.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 496
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9905275662740072
          entropy_coeff: 0.009999999999999998
          kl: 0.00800041750334113
          policy_loss: -0.05821209623374873
          total_loss: -0.07091252151876688
          vf_explained_var: -0.561565637588501
          vf_loss: 0.0011295341679619418
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,181,4440.12,181000,0,0,0,324.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-08_23-38-37
  done: false
  episode_len_mean: 324.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 500
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.1162966277864244
          entropy_coeff: 0.009999999999999998
          kl: 0.01276231373496941
          policy_loss: -0.08798101667521728
          total_loss: -0.0989291469152603
          vf_explained_var: -1.0
          vf_loss: 0.0005234530991098534
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,182,4469.33,182000,0,0,0,324.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-08_23-39-03
  done: false
  episode_len_mean: 325.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 503
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.960196896394094
          entropy_coeff: 0.009999999999999998
          kl: 0.009906922885797555
          policy_loss: -0.12087116638819377
          total_loss: -0.1323008292251163
          vf_explained_var: -1.0
          vf_loss: 0.0006492363838737624
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,183,4495.75,183000,0,0,0,325.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-08_23-39-29
  done: false
  episode_len_mean: 326.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 505
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6518300453821817
          entropy_coeff: 0.009999999999999998
          kl: 0.009810925552410312
          policy_loss: -0.02133032204583287
          total_loss: -0.028227329005797705
          vf_explained_var: -0.5603170394897461
          vf_loss: 0.002171120764170256
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,184,4520.83,184000,0,0,0,326.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-08_23-39-56
  done: false
  episode_len_mean: 328.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 508
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0211633417341446
          entropy_coeff: 0.009999999999999998
          kl: 0.012586503236237294
          policy_loss: -0.046293538581166
          total_loss: -0.05641338992863894
          vf_explained_var: -0.5711321234703064
          vf_loss: 0.0005339075280870829
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,185,4548.73,185000,0,0,0,328.41




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-08_23-40-37
  done: false
  episode_len_mean: 330.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 511
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9851430959171719
          entropy_coeff: 0.009999999999999998
          kl: 0.008083043456126987
          policy_loss: -0.008993819852670034
          total_loss: -0.020848413474029964
          vf_explained_var: -0.0277523472905159
          vf_loss: 0.0018587767573383948
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,186,4588.83,186000,0,0,0,330.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-08_23-40-59
  done: false
  episode_len_mean: 332.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 514
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9048724161254036
          entropy_coeff: 0.009999999999999998
          kl: 0.012664855342496997
          policy_loss: -0.06542659720612896
          total_loss: -0.07330161904295286
          vf_explained_var: -0.43159839510917664
          vf_loss: 0.0015563251715826078
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,187,4611.47,187000,0,0,0,332.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-08_23-41-19
  done: false
  episode_len_mean: 334.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 516
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.94080185757743
          entropy_coeff: 0.009999999999999998
          kl: 0.010654712521459015
          policy_loss: -0.09731223533550898
          total_loss: -0.10611049615674549
          vf_explained_var: 0.007856025360524654
          vf_loss: 0.002518838082970534
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,188,4631.35,188000,0,0,0,334.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-08_23-41-44
  done: false
  episode_len_mean: 335.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 519
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9397402962048849
          entropy_coeff: 0.009999999999999998
          kl: 0.01377083856122003
          policy_loss: -0.05054608972536193
          total_loss: -0.05864458845721351
          vf_explained_var: -0.5111334323883057
          vf_loss: 0.0008416752968009355
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,189,4656.55,189000,0,0,0,335.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-08_23-42-10
  done: false
  episode_len_mean: 336.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 522
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.1227061298158434
          entropy_coeff: 0.009999999999999998
          kl: 0.013843336503380677
          policy_loss: -0.05140634644776583
          total_loss: -0.06147020839982563
          vf_explained_var: -0.9841321110725403
          vf_loss: 0.000650915781281785
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,190,4681.72,190000,0,0,0,336.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-08_23-42-32
  done: false
  episode_len_mean: 338.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 525
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9073238107893202
          entropy_coeff: 0.009999999999999998
          kl: 0.010131824592426468
          policy_loss: -0.07261681076553132
          total_loss: -0.08214752260181639
          vf_explained_var: -0.43622174859046936
          vf_loss: 0.0018486704286058537
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,191,4704.55,191000,0,0,0,338.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-08_23-42-53
  done: false
  episode_len_mean: 340.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 527
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7153891682624818
          entropy_coeff: 0.009999999999999998
          kl: 0.012253897291266549
          policy_loss: -0.0347454272210598
          total_loss: -0.04141288813617494
          vf_explained_var: 0.012934229336678982
          vf_loss: 0.0011811284186680699
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,192,4725.39,192000,0,0,0,340.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-08_23-43-17
  done: false
  episode_len_mean: 342.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 530
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8820816662576463
          entropy_coeff: 0.009999999999999998
          kl: 0.01044161447739719
          policy_loss: -0.07510425616055727
          total_loss: -0.08535061130921046
          vf_explained_var: -0.43758976459503174
          vf_loss: 0.0006453615044140153
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,193,4749.12,193000,0,0,0,342.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-08_23-43-40
  done: false
  episode_len_mean: 344.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 533
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9806990120146009
          entropy_coeff: 0.009999999999999998
          kl: 0.00964220721979354
          policy_loss: -0.05157805677089426
          total_loss: -0.06229566145274374
          vf_explained_var: -1.0
          vf_loss: 0.0017673333692881795
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,194,4772.19,194000,0,0,0,344.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-08_23-44-05
  done: false
  episode_len_mean: 345.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 535
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0034528295199077
          entropy_coeff: 0.009999999999999998
          kl: 0.00857029689139246
          policy_loss: -0.08656517623199357
          total_loss: -0.09937843142284288
          vf_explained_var: -0.8946434259414673
          vf_loss: 0.000713204026235164
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,195,4796.93,195000,0,0,0,345.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-08_23-44-30
  done: false
  episode_len_mean: 346.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 538
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.860692987177107
          entropy_coeff: 0.009999999999999998
          kl: 0.012174908099553723
          policy_loss: -0.03271348484688335
          total_loss: -0.04088582636581527
          vf_explained_var: -0.7271096706390381
          vf_loss: 0.00118926515602248
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,196,4822.16,196000,0,0,0,346.08




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-08_23-45-09
  done: false
  episode_len_mean: 347.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 541
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9533176475101046
          entropy_coeff: 0.009999999999999998
          kl: 0.007248660425339606
          policy_loss: 0.0011718730959627364
          total_loss: -0.011868100313262808
          vf_explained_var: -1.0
          vf_loss: 0.000988751607817701
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,197,4861.48,197000,0,0,0,347.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-08_23-45-28
  done: false
  episode_len_mean: 350.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 543
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.136431680785285
          entropy_coeff: 0.009999999999999998
          kl: 0.010025742761149885
          policy_loss: -0.02414824768073029
          total_loss: -0.02730950036396583
          vf_explained_var: -0.038522206246852875
          vf_loss: 0.0005897623784322705
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,198,4880.13,198000,0,0,0,350.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-08_23-45-44
  done: false
  episode_len_mean: 354.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 545
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7734121680259705
          entropy_coeff: 0.009999999999999998
          kl: 0.009556703355239312
          policy_loss: -0.04449494381745656
          total_loss: -0.05264832460218006
          vf_explained_var: -0.07362909615039825
          vf_loss: 0.0023236185399582608
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,199,4895.82,199000,0,0,0,354.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-08_23-46-01
  done: false
  episode_len_mean: 357.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 547
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9413750992880927
          entropy_coeff: 0.009999999999999998
          kl: 0.005765990347262948
          policy_loss: -0.041319140481452146
          total_loss: -0.055469112222393355
          vf_explained_var: 0.28440600633621216
          vf_loss: 0.000885228585684672
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,200,4913.02,200000,0,0,0,357.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-08_23-46-19
  done: false
  episode_len_mean: 359.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 550
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.139913849035899
          entropy_coeff: 0.009999999999999998
          kl: 0.009141376628273606
          policy_loss: -0.033133746973342364
          total_loss: -0.04515359430677361
          vf_explained_var: -0.0013742181472480297
          vf_loss: 0.0024375598408773336
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,201,4931.21,201000,0,0,0,359.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-08_23-46-36
  done: false
  episode_len_mean: 361.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 552
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9446562356419033
          entropy_coeff: 0.009999999999999998
          kl: 0.010849227468199689
          policy_loss: -0.030608383069435755
          total_loss: -0.04060257880224122
          vf_explained_var: 0.37085601687431335
          vf_loss: 0.0012137350982003327
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,202,4948.02,202000,0,0,0,361.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-08_23-46-52
  done: false
  episode_len_mean: 364.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 554
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0507110291057162
          entropy_coeff: 0.009999999999999998
          kl: 0.010364385559531284
          policy_loss: -0.03004898503422737
          total_loss: -0.041570094289879005
          vf_explained_var: -0.11731305718421936
          vf_loss: 0.0011155444417252308
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,203,4964.15,203000,0,0,0,364.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-08_23-47-10
  done: false
  episode_len_mean: 365.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 557
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.970871881643931
          entropy_coeff: 0.009999999999999998
          kl: 0.01019075327838622
          policy_loss: -0.0021042980667617586
          total_loss: -0.011986908026867443
          vf_explained_var: -0.1903550922870636
          vf_loss: 0.0020875044910806336
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,204,4982.09,204000,0,0,0,365.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-08_23-47-28
  done: false
  episode_len_mean: 367.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 559
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.450143665737576
          entropy_coeff: 0.009999999999999998
          kl: 0.011805466780085776
          policy_loss: -0.0625774886045191
          total_loss: -0.07552029366294542
          vf_explained_var: -0.48016777634620667
          vf_loss: 0.0025938563065008363
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,205,4999.6,205000,0,0,0,367.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-08_23-47-46
  done: false
  episode_len_mean: 369.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 562
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.093579951922099
          entropy_coeff: 0.009999999999999998
          kl: 0.011894259839247558
          policy_loss: -0.12136340058512157
          total_loss: -0.13214091451631652
          vf_explained_var: -0.2569216191768646
          vf_loss: 0.0011260827421210706
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,206,5017.9,206000,0,0,0,369.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-08_23-48-06
  done: false
  episode_len_mean: 368.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 564
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.35212050014072
          entropy_coeff: 0.009999999999999998
          kl: 0.006787119141269828
          policy_loss: -0.05394005264259047
          total_loss: -0.07149619528402885
          vf_explained_var: -0.5327669978141785
          vf_loss: 0.0008110946846298045
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,207,5037.65,207000,0,0,0,368.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-08_23-48-24
  done: false
  episode_len_mean: 371.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 567
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.127187959353129
          entropy_coeff: 0.009999999999999998
          kl: 0.0091830381180196
          policy_loss: -0.07029301197164589
          total_loss: -0.08270169693148799
          vf_explained_var: -0.31933674216270447
          vf_loss: 0.0018898243608418853
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,208,5055.52,208000,0,0,0,371.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-08_23-48-42
  done: false
  episode_len_mean: 372.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 569
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.128081809149848
          entropy_coeff: 0.009999999999999998
          kl: 0.009056636463798777
          policy_loss: -0.04536636459330718
          total_loss: -0.05509566412203842
          vf_explained_var: -0.569203794002533
          vf_loss: 0.004674136627646577
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,209,5073.36,209000,0,0,0,372.2




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-08_23-49-19
  done: false
  episode_len_mean: 375.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 572
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6890759388605754
          entropy_coeff: 0.009999999999999998
          kl: 0.012238684793273367
          policy_loss: 0.02003354844119814
          total_loss: 0.01538645327091217
          vf_explained_var: 0.04330625385046005
          vf_loss: 0.0029499144891613266
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,210,5110.43,210000,0,0,0,375.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-08_23-49-38
  done: false
  episode_len_mean: 376.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 574
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6265977912478977
          entropy_coeff: 0.009999999999999998
          kl: 0.01362096973596911
          policy_loss: -0.11312981027488907
          total_loss: -0.11585530515553223
          vf_explained_var: -0.3814927935600281
          vf_loss: 0.0031970583626793492
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,211,5129.93,211000,0,0,0,376.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-08_23-50-00
  done: false
  episode_len_mean: 377.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 577
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7948154065344069
          entropy_coeff: 0.009999999999999998
          kl: 0.011538207485082018
          policy_loss: -0.02745834555890825
          total_loss: -0.034219989760054484
          vf_explained_var: -0.9859607219696045
          vf_loss: 0.0024246860295534132
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,212,5151.57,212000,0,0,0,377.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-08_23-50-21
  done: false
  episode_len_mean: 379.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 579
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.586272386709849
          entropy_coeff: 0.009999999999999998
          kl: 0.012483930838514625
          policy_loss: -0.06555477206905683
          total_loss: -0.06985044885012838
          vf_explained_var: 0.09630875289440155
          vf_loss: 0.0020870620080839014
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,213,5172.42,213000,0,0,0,379.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-08_23-50-44
  done: false
  episode_len_mean: 381.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 582
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4866483079062567
          entropy_coeff: 0.009999999999999998
          kl: 0.009945989494695178
          policy_loss: -0.10384828713205126
          total_loss: -0.11045960697034994
          vf_explained_var: 0.38042178750038147
          vf_loss: 0.0007024286183877848
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,214,5195.64,214000,0,0,0,381.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-08_23-51-05
  done: false
  episode_len_mean: 384.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 584
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.569332362545861
          entropy_coeff: 0.009999999999999998
          kl: 0.013317869609126672
          policy_loss: -0.10357590566078821
          total_loss: -0.10775468750960297
          vf_explained_var: 0.553209125995636
          vf_loss: 0.0014012861169046826
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,215,5217.08,215000,0,0,0,384.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-08_23-51-26
  done: false
  episode_len_mean: 385.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 586
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7805109752549066
          entropy_coeff: 0.009999999999999998
          kl: 0.01859368338595561
          policy_loss: -0.02993193813082245
          total_loss: -0.03194144920756419
          vf_explained_var: -0.2560076117515564
          vf_loss: 0.001676022659133499
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,216,5237.21,216000,0,0,0,385.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-08_23-51-44
  done: false
  episode_len_mean: 387.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 588
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.718628055519528
          entropy_coeff: 0.009999999999999998
          kl: 0.013036060422234522
          policy_loss: -0.04172075113488568
          total_loss: -0.047122876511679755
          vf_explained_var: -0.22204221785068512
          vf_loss: 0.0018848962409214842
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,217,5255.38,217000,0,0,0,387.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-08_23-52-06
  done: false
  episode_len_mean: 390.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 591
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7689127312766182
          entropy_coeff: 0.009999999999999998
          kl: 0.0158356938418984
          policy_loss: -0.13970339761839973
          total_loss: -0.1433224420166678
          vf_explained_var: -0.138417586684227
          vf_loss: 0.002044852697872557
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,218,5277.4,218000,0,0,0,390.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-08_23-52-28
  done: false
  episode_len_mean: 391.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 593
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6932314806514317
          entropy_coeff: 0.009999999999999998
          kl: 0.007986332580354761
          policy_loss: -0.07370991673734453
          total_loss: -0.08344456412725978
          vf_explained_var: -0.3227642774581909
          vf_loss: 0.001133042988335041
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,219,5299.26,219000,0,0,0,391.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-08_23-52-53
  done: false
  episode_len_mean: 391.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 596
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6522711369726393
          entropy_coeff: 0.009999999999999998
          kl: 0.010361632666036751
          policy_loss: -0.1181590193675624
          total_loss: -0.12512491051521565
          vf_explained_var: -0.21403548121452332
          vf_loss: 0.0016884535137150023
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,220,5324.57,220000,0,0,0,391.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-08_23-53-16
  done: false
  episode_len_mean: 392.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 599
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6139974872271219
          entropy_coeff: 0.009999999999999998
          kl: 0.013586635079292808
          policy_loss: -0.07238817472631733
          total_loss: -0.07667845298225681
          vf_explained_var: -0.10170497745275497
          vf_loss: 0.0015323456451167457
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,221,5347.84,221000,0,0,0,392.5




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-08_23-53-59
  done: false
  episode_len_mean: 391.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 602
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.733352545897166
          entropy_coeff: 0.009999999999999998
          kl: 0.009498584969729704
          policy_loss: -0.07571673066251808
          total_loss: -0.0846298655288087
          vf_explained_var: -0.5719708800315857
          vf_loss: 0.0012073992201799733
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,222,5390.22,222000,0,0,0,391.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-08_23-54-23
  done: false
  episode_len_mean: 391.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 605
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.481531102127499
          entropy_coeff: 0.009999999999999998
          kl: 0.013155790974960663
          policy_loss: -0.09647311348881986
          total_loss: -0.09976957212719652
          vf_explained_var: -0.07931693643331528
          vf_loss: 0.0015286764569787516
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,223,5414.89,223000,0,0,0,391.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-08_23-54-48
  done: false
  episode_len_mean: 390.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 608
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7646045234468248
          entropy_coeff: 0.009999999999999998
          kl: 0.010016959382621711
          policy_loss: -0.07991623195509116
          total_loss: -0.08891451847222116
          vf_explained_var: 0.046743620187044144
          vf_loss: 0.001041131716273311
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,224,5439.89,224000,0,0,0,390.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-08_23-55-13
  done: false
  episode_len_mean: 389.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 611
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5976068403985766
          entropy_coeff: 0.009999999999999998
          kl: 0.007853820744506537
          policy_loss: -0.09497283258371883
          total_loss: -0.10358700727423033
          vf_explained_var: -0.3246462047100067
          vf_loss: 0.0013978988443139112
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,225,5464.33,225000,0,0,0,389.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-08_23-55-36
  done: false
  episode_len_mean: 388.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 614
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.061367091867659
          entropy_coeff: 0.009999999999999998
          kl: 0.015342228228205378
          policy_loss: -0.026994712899128595
          total_loss: -0.03470217403438356
          vf_explained_var: -0.2453782558441162
          vf_loss: 0.0012557050122672486
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,226,5487.73,226000,0,0,0,388.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-08_23-56-03
  done: false
  episode_len_mean: 384.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 617
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568480398919848
          entropy_coeff: 0.009999999999999998
          kl: 0.01321476613016554
          policy_loss: -0.053818147629499435
          total_loss: -0.06084465359648069
          vf_explained_var: 0.7480401992797852
          vf_loss: 0.0005070124177210446
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,227,5514.06,227000,0,0,0,384.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-08_23-56-25
  done: false
  episode_len_mean: 385.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 620
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.838271849685245
          entropy_coeff: 0.009999999999999998
          kl: 0.008037209196423352
          policy_loss: -0.0039025996294286516
          total_loss: -0.015503759930531184
          vf_explained_var: -0.03727131709456444
          vf_loss: 0.0006783007162286796
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,228,5536.86,228000,0,0,0,385.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-08_23-56-48
  done: false
  episode_len_mean: 386.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 623
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9181981258922154
          entropy_coeff: 0.009999999999999998
          kl: 0.00997355289197706
          policy_loss: -0.019735681638121606
          total_loss: -0.030222026217314932
          vf_explained_var: -0.05316132307052612
          vf_loss: 0.0011219697816866553
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,229,5559.21,229000,0,0,0,386.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-08_23-57-15
  done: false
  episode_len_mean: 385.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 626
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8224906418058606
          entropy_coeff: 0.009999999999999998
          kl: 0.012567305391789792
          policy_loss: -0.09886733873022926
          total_loss: -0.10610195781207747
          vf_explained_var: -0.029106220230460167
          vf_loss: 0.0014469935927384843
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,230,5586.14,230000,0,0,0,385.15




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-08_23-58-00
  done: false
  episode_len_mean: 379.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 630
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6705362598101299
          entropy_coeff: 0.009999999999999998
          kl: 0.0107373636000824
          policy_loss: -0.12058659858173794
          total_loss: -0.12824970740411018
          vf_explained_var: -0.15125243365764618
          vf_loss: 0.0008885692601324991
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,231,5631.14,231000,0,0,0,379.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-08_23-58-27
  done: false
  episode_len_mean: 377.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 633
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.76548306412167
          entropy_coeff: 0.009999999999999998
          kl: 0.014806894184624575
          policy_loss: -0.08441724206010501
          total_loss: -0.0892392079035441
          vf_explained_var: -0.2844829857349396
          vf_loss: 0.0015888790050262792
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,232,5658.74,232000,0,0,0,377.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-08_23-58-50
  done: false
  episode_len_mean: 378.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 636
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7955317099889119
          entropy_coeff: 0.009999999999999998
          kl: 0.013153292398692172
          policy_loss: -0.04051359382768472
          total_loss: -0.046924220770597456
          vf_explained_var: -0.08752724528312683
          vf_loss: 0.0015564093211044869
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,233,5681.85,233000,0,0,0,378.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-08_23-59-16
  done: false
  episode_len_mean: 377.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 639
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.739545800950792
          entropy_coeff: 0.009999999999999998
          kl: 0.01238185354835484
          policy_loss: -0.06284523569047451
          total_loss: -0.06989632236460845
          vf_explained_var: -0.6391010880470276
          vf_loss: 0.0009419039105220387
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,234,5707.24,234000,0,0,0,377.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-08_23-59-39
  done: false
  episode_len_mean: 376.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 642
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8171179864141676
          entropy_coeff: 0.009999999999999998
          kl: 0.011517052389534597
          policy_loss: -0.10426376606855127
          total_loss: -0.1126223930882083
          vf_explained_var: -0.022790569812059402
          vf_loss: 0.0010667908736245913
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,235,5730.04,235000,0,0,0,376.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-09_00-00-01
  done: false
  episode_len_mean: 371.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 645
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6759113563431633
          entropy_coeff: 0.009999999999999998
          kl: 0.012564149747758711
          policy_loss: -0.06797652648140987
          total_loss: -0.07379448885718981
          vf_explained_var: -0.726775050163269
          vf_loss: 0.0014002494695078995
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,236,5752.13,236000,0,0,0,371.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-09_00-00-25
  done: false
  episode_len_mean: 368.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 648
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6081858197848002
          entropy_coeff: 0.009999999999999998
          kl: 0.011790414906933222
          policy_loss: -0.036995476484298705
          total_loss: -0.04282025235394637
          vf_explained_var: 0.0709356740117073
          vf_loss: 0.0013037335153462159
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,237,5775.86,237000,0,0,0,368.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-09_00-00-42
  done: false
  episode_len_mean: 369.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 650
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.554398516813914
          entropy_coeff: 0.009999999999999998
          kl: 0.01224625380556069
          policy_loss: -0.06925020234452353
          total_loss: -0.0737399407558971
          vf_explained_var: -0.4592916667461395
          vf_loss: 0.0017547491944343266
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,238,5793.69,238000,0,0,0,369.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-09_00-01-00
  done: false
  episode_len_mean: 370.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 652
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.405110432042016
          entropy_coeff: 0.009999999999999998
          kl: 0.011362356166752777
          policy_loss: -0.11085334666487244
          total_loss: -0.11537307502908839
          vf_explained_var: -0.30464476346969604
          vf_loss: 0.0009030901112257399
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,239,5811.22,239000,0,0,0,370.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-09_00-01-21
  done: false
  episode_len_mean: 370.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 654
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6150883846812778
          entropy_coeff: 0.009999999999999998
          kl: 0.008898424893926634
          policy_loss: 0.00038992445915937425
          total_loss: -0.007760358394847976
          vf_explained_var: -0.6041601300239563
          vf_loss: 0.0012433608877472579
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,240,5832.2,240000,0,0,0,370.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-09_00-01-38
  done: false
  episode_len_mean: 371.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 656
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5288211637073092
          entropy_coeff: 0.009999999999999998
          kl: 0.013160320223829933
          policy_loss: -0.11710625754462348
          total_loss: -0.12034841407504347
          vf_explained_var: -0.224093496799469
          vf_loss: 0.0020524360925062665
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,241,5849.23,241000,0,0,0,371.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-09_00-01-58
  done: false
  episode_len_mean: 371.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 659
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8750552349620395
          entropy_coeff: 0.009999999999999998
          kl: 0.013849567026598326
          policy_loss: -0.08306822919597229
          total_loss: -0.08950560939394765
          vf_explained_var: -0.7729384303092957
          vf_loss: 0.001796156524344244
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,242,5869.21,242000,0,0,0,371.03




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-09_00-02-32
  done: false
  episode_len_mean: 372.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 661
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.347160679101944
          entropy_coeff: 0.009999999999999998
          kl: 0.011196062549628355
          policy_loss: -0.0349051576314701
          total_loss: -0.03839325304660532
          vf_explained_var: -0.6765496730804443
          vf_loss: 0.0014815035166167137
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,243,5902.79,243000,0,0,0,372.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-09_00-02-50
  done: false
  episode_len_mean: 376.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 663
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7740770194265578
          entropy_coeff: 0.009999999999999998
          kl: 0.00558278806343637
          policy_loss: 0.003586308616730902
          total_loss: -0.009343121821681658
          vf_explained_var: -0.4544554352760315
          vf_loss: 0.0005719092173320758
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,244,5921.22,244000,0,0,0,376.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-09_00-03-10
  done: false
  episode_len_mean: 377.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 665
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9677859902381898
          entropy_coeff: 0.009999999999999998
          kl: 0.013285287095859289
          policy_loss: -0.033392229076060986
          total_loss: -0.04013796601858404
          vf_explained_var: -0.6231948137283325
          vf_loss: 0.0028436087511686816
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,245,5941.49,245000,0,0,0,377.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-09_00-03-32
  done: false
  episode_len_mean: 378.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 667
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7786248975329928
          entropy_coeff: 0.009999999999999998
          kl: 0.012459624459523633
          policy_loss: -0.09195598314205805
          total_loss: -0.099155918839905
          vf_explained_var: -0.5166304111480713
          vf_loss: 0.001124784840309682
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,246,5962.88,246000,0,0,0,378.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-09_00-03-54
  done: false
  episode_len_mean: 377.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6357086300849915
          entropy_coeff: 0.009999999999999998
          kl: 0.01447657402449237
          policy_loss: -0.07046615415149265
          total_loss: -0.07441507528225581
          vf_explained_var: -0.2301468551158905
          vf_loss: 0.001415017812461075
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,247,5985.23,247000,0,0,0,377.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-09_00-04-11
  done: false
  episode_len_mean: 378.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 672
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2669388903511896
          entropy_coeff: 0.009999999999999998
          kl: 0.007767562857736306
          policy_loss: -0.03527906284564071
          total_loss: -0.04083221132556598
          vf_explained_var: -0.37027406692504883
          vf_loss: 0.0012177492105466727
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,248,6002.06,248000,0,0,0,378.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-09_00-04-32
  done: false
  episode_len_mean: 378.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 674
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8179411013921103
          entropy_coeff: 0.009999999999999998
          kl: 0.01328712669068533
          policy_loss: -0.07966178961926036
          total_loss: -0.08650521180695958
          vf_explained_var: -0.5728130340576172
          vf_loss: 0.0012460776578841937
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,249,6022.61,249000,0,0,0,378.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-09_00-04-56
  done: false
  episode_len_mean: 377.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 677
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5262836376825968
          entropy_coeff: 0.009999999999999998
          kl: 0.010253221803989652
          policy_loss: -0.09923874400556087
          total_loss: -0.10504161094625791
          vf_explained_var: 0.3215825855731964
          vf_loss: 0.0016739316245851417
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,250,6047.37,250000,0,0,0,377.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-09_00-05-13
  done: false
  episode_len_mean: 378.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 679
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.731455930074056
          entropy_coeff: 0.009999999999999998
          kl: 0.01006168535501308
          policy_loss: -0.003461197018623352
          total_loss: -0.01220259384976493
          vf_explained_var: -0.24401992559432983
          vf_loss: 0.0009325718879052955
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,251,6064.34,251000,0,0,0,378.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-09_00-05-35
  done: false
  episode_len_mean: 380.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 682
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7346445520718892
          entropy_coeff: 0.009999999999999998
          kl: 0.011728554389566683
          policy_loss: 0.0006514011985725827
          total_loss: -0.006942821997735236
          vf_explained_var: -0.19423681497573853
          vf_loss: 0.00084585255859161
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,252,6085.74,252000,0,0,0,380.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-09_00-05-55
  done: false
  episode_len_mean: 380.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 684
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4413700646824308
          entropy_coeff: 0.009999999999999998
          kl: 0.009715800673492205
          policy_loss: -0.07933141315976779
          total_loss: -0.08517063334584236
          vf_explained_var: -0.7976051568984985
          vf_loss: 0.0011965453307816966
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,253,6106.19,253000,0,0,0,380.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-09_00-06-15
  done: false
  episode_len_mean: 381.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 686
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7491836905479432
          entropy_coeff: 0.009999999999999998
          kl: 0.0114123954579229
          policy_loss: -0.055119005673461494
          total_loss: -0.062465087531341446
          vf_explained_var: -0.44117873907089233
          vf_loss: 0.0014794655855641598
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,254,6125.68,254000,0,0,0,381.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-09_00-06-31
  done: false
  episode_len_mean: 382.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 688
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.628876378801134
          entropy_coeff: 0.009999999999999998
          kl: 0.012683409481053403
          policy_loss: -0.08466082714084122
          total_loss: -0.09026669804006815
          vf_explained_var: -0.4434787631034851
          vf_loss: 0.0010514285298995675
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,255,6142.03,255000,0,0,0,382.51




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-09_00-07-12
  done: false
  episode_len_mean: 380.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 691
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.563559075196584
          entropy_coeff: 0.009999999999999998
          kl: 0.012872140019636566
          policy_loss: -0.04568884138845735
          total_loss: -0.05030727895597617
          vf_explained_var: -0.2628443241119385
          vf_loss: 0.001242371317413118
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,256,6182.81,256000,0,0,0,380.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-09_00-07-32
  done: false
  episode_len_mean: 380.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 693
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.461855505572425
          entropy_coeff: 0.009999999999999998
          kl: 0.012211685607883677
          policy_loss: -0.04063227412601312
          total_loss: -0.04485586591892772
          vf_explained_var: 0.08769919723272324
          vf_loss: 0.0011217161709080553
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,257,6203.22,257000,0,0,0,380.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-09_00-07-53
  done: false
  episode_len_mean: 382.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 696
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6220140669080947
          entropy_coeff: 0.009999999999999998
          kl: 0.013877631854859166
          policy_loss: -0.08022876750263903
          total_loss: -0.0845635686069727
          vf_explained_var: -0.31556814908981323
          vf_loss: 0.0013470125080655433
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,258,6223.79,258000,0,0,0,382.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-09_00-08-10
  done: false
  episode_len_mean: 384.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 697
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6307872441079883
          entropy_coeff: 0.009999999999999998
          kl: 0.013176658856579386
          policy_loss: -0.08716577326671945
          total_loss: -0.09181345413542456
          vf_explained_var: -1.0
          vf_loss: 0.001654166325331769
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,259,6240.51,259000,0,0,0,384.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-09_00-08-36
  done: false
  episode_len_mean: 385.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 700
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8163861486646864
          entropy_coeff: 0.009999999999999998
          kl: 0.009889040704773637
          policy_loss: -0.05339901954349544
          total_loss: -0.06329461745917797
          vf_explained_var: -0.13184043765068054
          vf_loss: 0.0007587726822950774
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,260,6266.33,260000,0,0,0,385.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-09_00-09-01
  done: false
  episode_len_mean: 384.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 703
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4973848130967882
          entropy_coeff: 0.009999999999999998
          kl: 0.00820268471638291
          policy_loss: -0.08882674200253354
          total_loss: -0.09663951786028015
          vf_explained_var: 0.3179364502429962
          vf_loss: 0.000932156579155061
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,261,6291.92,261000,0,0,0,384.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-09_00-09-24
  done: false
  episode_len_mean: 386.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 706
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9758890827496847
          entropy_coeff: 0.009999999999999998
          kl: 0.012788573912533173
          policy_loss: -0.05698962658643723
          total_loss: -0.06620773085289532
          vf_explained_var: 0.04283400997519493
          vf_loss: 0.0008294649769797818
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,262,6314.35,262000,0,0,0,386.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-09_00-09-46
  done: false
  episode_len_mean: 388.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 709
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7925794217321607
          entropy_coeff: 0.009999999999999998
          kl: 0.011336694854415125
          policy_loss: -0.06656349601431025
          total_loss: -0.07475037002522085
          vf_explained_var: -0.6453907489776611
          vf_loss: 0.001130118704168126
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,263,6336.69,263000,0,0,0,388.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-09_00-10-11
  done: false
  episode_len_mean: 388.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 711
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8340825186835394
          entropy_coeff: 0.009999999999999998
          kl: 0.011221354304014457
          policy_loss: -0.056554338791304164
          total_loss: -0.0647827957653337
          vf_explained_var: -0.6046557426452637
          vf_loss: 0.0015911534338051247
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,264,6362.05,264000,0,0,0,388.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-09_00-10-34
  done: false
  episode_len_mean: 390.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 714
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6832299285464816
          entropy_coeff: 0.009999999999999998
          kl: 0.01268698470477108
          policy_loss: -0.09604472145438195
          total_loss: -0.10206562189592255
          vf_explained_var: -0.9467944502830505
          vf_loss: 0.001177221617480326
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,265,6385.04,265000,0,0,0,390.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-09_00-10-57
  done: false
  episode_len_mean: 393.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 717
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9262133704291449
          entropy_coeff: 0.009999999999999998
          kl: 0.008017343665598
          policy_loss: -0.02606847389704651
          total_loss: -0.038536096695396635
          vf_explained_var: -0.9572851657867432
          vf_loss: 0.0007063407583498499
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,266,6407.38,266000,0,0,0,393.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-09_00-11-16
  done: false
  episode_len_mean: 394.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 719
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6051343732410006
          entropy_coeff: 0.009999999999999998
          kl: 0.012609348312112603
          policy_loss: -0.13973849008066785
          total_loss: -0.14512939490377902
          vf_explained_var: -0.02987263724207878
          vf_loss: 0.001085214530919782
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,267,6426.67,267000,0,0,0,394.16




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-09_00-11-52
  done: false
  episode_len_mean: 397.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 721
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.64510198964013
          entropy_coeff: 0.009999999999999998
          kl: 0.012710726786664218
          policy_loss: -0.0730873561774691
          total_loss: -0.0784934392819802
          vf_explained_var: -0.6834625601768494
          vf_loss: 0.0013927270917014943
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,268,6462.48,268000,0,0,0,397.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-09_00-12-11
  done: false
  episode_len_mean: 398.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 723
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.638767613304986
          entropy_coeff: 0.009999999999999998
          kl: 0.01215967640741869
          policy_loss: -0.026563906855881215
          total_loss: -0.03239309562163221
          vf_explained_var: -0.4289571940898895
          vf_loss: 0.0013247317563380217
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,269,6481.74,269000,0,0,0,398.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-09_00-12-27
  done: false
  episode_len_mean: 405.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 725
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.12764193283187
          entropy_coeff: 0.009999999999999998
          kl: 0.008374016027288178
          policy_loss: -0.06702577504846785
          total_loss: -0.07131223964194457
          vf_explained_var: -0.5749039053916931
          vf_loss: 0.0006309354512874658
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,270,6497.2,270000,0,0,0,405.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-09_00-12-47
  done: false
  episode_len_mean: 407.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 727
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6507673899332682
          entropy_coeff: 0.009999999999999998
          kl: 0.012642594718008935
          policy_loss: 0.010930421907040808
          total_loss: 0.005765815161996418
          vf_explained_var: -0.8855189085006714
          vf_loss: 0.0017425969954476589
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,271,6517.36,271000,0,0,0,407.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-09_00-13-10
  done: false
  episode_len_mean: 410.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 730
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7707393089930217
          entropy_coeff: 0.009999999999999998
          kl: 0.015332097298335809
          policy_loss: -0.04324776315026813
          total_loss: -0.04797696388430066
          vf_explained_var: -0.48344653844833374
          vf_loss: 0.0013353816864158337
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,272,6540.83,272000,0,0,0,410.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-09_00-13-32
  done: false
  episode_len_mean: 413.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 732
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0520481136110096
          entropy_coeff: 0.009999999999999998
          kl: 0.011125443137630553
          policy_loss: -0.06520258705649111
          total_loss: -0.07662039705448681
          vf_explained_var: -1.0
          vf_loss: 0.0006542858612697778
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,273,6562.26,273000,0,0,0,413.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-09_00-13-55
  done: false
  episode_len_mean: 414.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 735
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.626021879249149
          entropy_coeff: 0.009999999999999998
          kl: 0.009888130236583276
          policy_loss: -0.09450192095504867
          total_loss: -0.10281886905431747
          vf_explained_var: -0.5968244075775146
          vf_loss: 0.00043447275400265224
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,274,6585.7,274000,0,0,0,414.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-09_00-14-17
  done: false
  episode_len_mean: 415.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 737
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7710256112946405
          entropy_coeff: 0.009999999999999998
          kl: 0.011423518262182715
          policy_loss: -0.061758849355909556
          total_loss: -0.06987117764850458
          vf_explained_var: -0.667792797088623
          vf_loss: 0.0009231949978533925
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,275,6607.61,275000,0,0,0,415.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-09_00-14-40
  done: false
  episode_len_mean: 417.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 740
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.852356935871972
          entropy_coeff: 0.009999999999999998
          kl: 0.013103157228998422
          policy_loss: -0.07876043572194047
          total_loss: -0.08663788528905975
          vf_explained_var: -0.5366376638412476
          vf_loss: 0.0006959127811973707
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,276,6630.37,276000,0,0,0,417.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-09_00-15-03
  done: false
  episode_len_mean: 417.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 743
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.928521000014411
          entropy_coeff: 0.009999999999999998
          kl: 0.012363049109154783
          policy_loss: 0.037393170843521756
          total_loss: 0.028048423636290763
          vf_explained_var: -0.93914794921875
          vf_loss: 0.0005522712725602711
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,277,6653.73,277000,0,0,0,417.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-09_00-15-27
  done: false
  episode_len_mean: 417.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 746
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.02617109881507
          entropy_coeff: 0.009999999999999998
          kl: 0.010800339955875534
          policy_loss: -0.061352411781748134
          total_loss: -0.07307503826191855
          vf_explained_var: -1.0
          vf_loss: 0.0003375748663933741
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,278,6677.94,278000,0,0,0,417.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-09_00-15-51
  done: false
  episode_len_mean: 418.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 748
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8389156460762024
          entropy_coeff: 0.009999999999999998
          kl: 0.011497371681653096
          policy_loss: -0.0901823606962959
          total_loss: -0.09880964126851824
          vf_explained_var: -0.7222591042518616
          vf_loss: 0.0010310596456596006
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,279,6701.63,279000,0,0,0,418.02




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-09_00-16-34
  done: false
  episode_len_mean: 411.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 752
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8927861637539334
          entropy_coeff: 0.009999999999999998
          kl: 0.013545322416290823
          policy_loss: -0.06319961779647404
          total_loss: -0.07136642220947477
          vf_explained_var: -0.33096522092819214
          vf_loss: 0.00047507740495752337
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,280,6744.33,280000,0,0,0,411.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-09_00-16-57
  done: false
  episode_len_mean: 409.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 754
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7514052748680116
          entropy_coeff: 0.009999999999999998
          kl: 0.008517843901198575
          policy_loss: -0.05832511844734351
          total_loss: -0.06864347805579503
          vf_explained_var: -0.807798445224762
          vf_loss: 0.0007274554308322775
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,281,6767.22,281000,0,0,0,409.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-09_00-17-19
  done: false
  episode_len_mean: 407.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 757
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225197275479634
          entropy_coeff: 0.009999999999999998
          kl: 0.011630069845888505
          policy_loss: -0.0850439285652505
          total_loss: -0.09363766176005205
          vf_explained_var: -0.9821060299873352
          vf_loss: 0.0007998785569927552
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,282,6789.23,282000,0,0,0,407.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-09_00-17-40
  done: false
  episode_len_mean: 406.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 760
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7946323487493727
          entropy_coeff: 0.009999999999999998
          kl: 0.012474057131676335
          policy_loss: -0.02398550216522482
          total_loss: -0.03134878517852889
          vf_explained_var: -0.9089443683624268
          vf_loss: 0.0011105560083200948
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,283,6810.44,283000,0,0,0,406.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-09_00-18-01
  done: false
  episode_len_mean: 403.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 762
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7738353027237785
          entropy_coeff: 0.009999999999999998
          kl: 0.012313045713275173
          policy_loss: -0.1114312042378717
          total_loss: -0.11861675729354222
          vf_explained_var: -0.13064178824424744
          vf_loss: 0.0012025770362621795
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,284,6831.26,284000,0,0,0,403.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-09_00-18-23
  done: false
  episode_len_mean: 400.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 765
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9119793587260776
          entropy_coeff: 0.009999999999999998
          kl: 0.010890038625018267
          policy_loss: -0.08454261186222235
          total_loss: -0.09462076978137096
          vf_explained_var: -0.990401566028595
          vf_loss: 0.000772012882387369
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,285,6853.64,285000,0,0,0,400.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-09_00-18-46
  done: false
  episode_len_mean: 398.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 768
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7042843964364793
          entropy_coeff: 0.009999999999999998
          kl: 0.011161935968859785
          policy_loss: -0.1386478692293167
          total_loss: -0.14668001954754192
          vf_explained_var: -0.7069984674453735
          vf_loss: 0.0005345998965721163
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,286,6876.66,286000,0,0,0,398.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-09_00-19-08
  done: false
  episode_len_mean: 398.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 770
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8019999768998889
          entropy_coeff: 0.009999999999999998
          kl: 0.010381800330522613
          policy_loss: -0.05763991746223635
          total_loss: -0.06691950567894511
          vf_explained_var: -0.4507378339767456
          vf_loss: 0.0008567314472101215
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,287,6898.04,287000,0,0,0,398


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-09_00-19-30
  done: false
  episode_len_mean: 394.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 773
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7850121670299106
          entropy_coeff: 0.009999999999999998
          kl: 0.01292825833416534
          policy_loss: -0.08695682547986508
          total_loss: -0.09415565878152847
          vf_explained_var: -0.816288411617279
          vf_loss: 0.0008338909974554554
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,288,6920.33,288000,0,0,0,394.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-09_00-19-54
  done: false
  episode_len_mean: 393.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 776
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9464898665746053
          entropy_coeff: 0.009999999999999998
          kl: 0.012206249290297332
          policy_loss: -0.1157708646522628
          total_loss: -0.12522059823903772
          vf_explained_var: -0.8162659406661987
          vf_loss: 0.0007460475938084225
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,289,6943.96,289000,0,0,0,393.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-09_00-20-18
  done: false
  episode_len_mean: 391.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 779
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0088380853335064
          entropy_coeff: 0.009999999999999998
          kl: 0.009648721719825134
          policy_loss: -0.06506083388295439
          total_loss: -0.07740594227280882
          vf_explained_var: -0.8745789527893066
          vf_loss: 0.0004162746151754012
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,290,6968.35,290000,0,0,0,391.64




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-09_00-20-59
  done: false
  episode_len_mean: 389.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 782
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7622630066341824
          entropy_coeff: 0.009999999999999998
          kl: 0.00867064954517771
          policy_loss: -0.028927989304065704
          total_loss: -0.03924024978445636
          vf_explained_var: -0.5182207226753235
          vf_loss: 0.0007260928748615293
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,291,7008.77,291000,0,0,0,389.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-09_00-21-18
  done: false
  episode_len_mean: 388.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 784
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9131211638450623
          entropy_coeff: 0.009999999999999998
          kl: 0.012266410247786888
          policy_loss: -0.07795798587095407
          total_loss: -0.0872044574469328
          vf_explained_var: -0.9253526329994202
          vf_loss: 0.0005699335704169547
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,292,7028.64,292000,0,0,0,388.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-09_00-21-39
  done: false
  episode_len_mean: 388.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 786
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.756832480430603
          entropy_coeff: 0.009999999999999998
          kl: 0.010544599168689217
          policy_loss: -0.07393519755245911
          total_loss: -0.08305062063090089
          vf_explained_var: -0.9892458319664001
          vf_loss: 0.000445598540116205
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,293,7048.68,293000,0,0,0,388.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-09_00-22-00
  done: false
  episode_len_mean: 385.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 789
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6130020327038235
          entropy_coeff: 0.009999999999999998
          kl: 0.012172460193899464
          policy_loss: -0.0537848490393824
          total_loss: -0.06017276210089525
          vf_explained_var: -0.5343769788742065
          vf_loss: 0.0004986460815416649
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,294,7069.96,294000,0,0,0,385.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-09_00-22-23
  done: false
  episode_len_mean: 384.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 792
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6832450853453742
          entropy_coeff: 0.009999999999999998
          kl: 0.007324531078124726
          policy_loss: -0.12686635015739334
          total_loss: -0.13783375471830367
          vf_explained_var: -1.0
          vf_loss: 0.00030297922413511616
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,295,7092.95,295000,0,0,0,384.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-09_00-22-42
  done: false
  episode_len_mean: 385.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 794
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4170918941497803
          entropy_coeff: 0.009999999999999998
          kl: 0.011652397299461844
          policy_loss: -0.07804083170162307
          total_loss: -0.08252408545878198
          vf_explained_var: 0.10331738740205765
          vf_loss: 0.0008391272184477809
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,296,7112.44,296000,0,0,0,385.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-09_00-23-08
  done: false
  episode_len_mean: 382.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 797
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6876734640863207
          entropy_coeff: 0.009999999999999998
          kl: 0.0105397017847067
          policy_loss: -0.03085826151072979
          total_loss: -0.039208494789070555
          vf_explained_var: -0.966795802116394
          vf_loss: 0.0005229150806877038
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,297,7138.53,297000,0,0,0,382.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-09_00-23-31
  done: false
  episode_len_mean: 380.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 800
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7026645090844896
          entropy_coeff: 0.009999999999999998
          kl: 0.010650962046561713
          policy_loss: -0.04413043264713552
          total_loss: -0.05246190449429883
          vf_explained_var: -0.6896674036979675
          vf_loss: 0.000607099813189254
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,298,7160.61,298000,0,0,0,380.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-09_00-23-52
  done: false
  episode_len_mean: 383.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 803
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6418273833062913
          entropy_coeff: 0.009999999999999998
          kl: 0.011222869563165242
          policy_loss: -0.024066598816878264
          total_loss: -0.030874094346331225
          vf_explained_var: -0.3294321894645691
          vf_loss: 0.001088412559733519
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,299,7181.64,299000,0,0,0,383.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-09_00-24-11
  done: false
  episode_len_mean: 385.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 805
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5700800524817573
          entropy_coeff: 0.009999999999999998
          kl: 0.015686822163716328
          policy_loss: -0.06983172266433636
          total_loss: -0.07255363735473819
          vf_explained_var: -0.47095954418182373
          vf_loss: 0.001066702775683047
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,300,7200.94,300000,0,0,0,385.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-09_00-24-33
  done: false
  episode_len_mean: 383.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 808
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.736489995320638
          entropy_coeff: 0.009999999999999998
          kl: 0.01110171818770035
          policy_loss: -0.0854199760593474
          total_loss: -0.09341920348298219
          vf_explained_var: -0.4940958619117737
          vf_loss: 0.000935303834396311
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,301,7222.96,301000,0,0,0,383.67




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-09_00-25-14
  done: false
  episode_len_mean: 382.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 811
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7734266850683424
          entropy_coeff: 0.009999999999999998
          kl: 0.010583590997111496
          policy_loss: -0.05192798158774773
          total_loss: -0.06106557501479983
          vf_explained_var: -0.9488170146942139
          vf_loss: 0.0005597609716157119
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,302,7264.45,302000,0,0,0,382.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-09_00-25-39
  done: false
  episode_len_mean: 381.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 814
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0087392237451343
          entropy_coeff: 0.009999999999999998
          kl: 0.011050479875335344
          policy_loss: -0.05150007644875182
          total_loss: -0.06271656203187174
          vf_explained_var: -0.9411890506744385
          vf_loss: 0.0004794469688527493
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,303,7288.84,303000,0,0,0,381.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-09_00-26-02
  done: false
  episode_len_mean: 380.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 817
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9149083256721497
          entropy_coeff: 0.009999999999999998
          kl: 0.011376570473767838
          policy_loss: -0.04978011215312613
          total_loss: -0.05957509892889195
          vf_explained_var: -0.3930683135986328
          vf_loss: 0.0007150156019229649
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,304,7312.28,304000,0,0,0,380.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-09_00-26-28
  done: false
  episode_len_mean: 376.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 820
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9104323228200277
          entropy_coeff: 0.009999999999999998
          kl: 0.010745778111922672
          policy_loss: -0.055268411545289886
          total_loss: -0.06541533412204849
          vf_explained_var: -0.6130450963973999
          vf_loss: 0.0007973252721260198
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,305,7338.36,305000,0,0,0,376.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-09_00-26-51
  done: false
  episode_len_mean: 373.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 823
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8535718361536662
          entropy_coeff: 0.009999999999999998
          kl: 0.01136583072624045
          policy_loss: -0.06008200320518679
          total_loss: -0.06934174884938532
          vf_explained_var: -1.0
          vf_loss: 0.0006450425904606365
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,306,7361.23,306000,0,0,0,373.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-09_00-27-15
  done: false
  episode_len_mean: 366.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 825
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9533442815144857
          entropy_coeff: 0.009999999999999998
          kl: 0.008115230940583086
          policy_loss: -0.013705471613340908
          total_loss: -0.02669041794207361
          vf_explained_var: -0.9502633213996887
          vf_loss: 0.00038599424491015576
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,307,7384.51,307000,0,0,0,366.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-09_00-27-37
  done: false
  episode_len_mean: 366.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 828
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9214968429671393
          entropy_coeff: 0.009999999999999998
          kl: 0.009878476423431022
          policy_loss: -0.07511285800072882
          total_loss: -0.08623501127585768
          vf_explained_var: -0.8549063205718994
          vf_loss: 0.0005913482201220985
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,308,7407.37,308000,0,0,0,366.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-09_00-27-59
  done: false
  episode_len_mean: 365.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 831
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8869064754909939
          entropy_coeff: 0.009999999999999998
          kl: 0.010486844372233782
          policy_loss: -0.035806994874858195
          total_loss: -0.046229132016499835
          vf_explained_var: -1.0
          vf_loss: 0.00048348121862444614
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,309,7429.2,309000,0,0,0,365.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-09_00-28-21
  done: false
  episode_len_mean: 365.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 833
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8469131853845384
          entropy_coeff: 0.009999999999999998
          kl: 0.010544590834221667
          policy_loss: -0.0447384312024547
          total_loss: -0.05452249638943209
          vf_explained_var: -0.6574216485023499
          vf_loss: 0.0006777659304336542
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,310,7451.09,310000,0,0,0,365.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-09_00-28-46
  done: false
  episode_len_mean: 364.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 836
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.905866391128964
          entropy_coeff: 0.009999999999999998
          kl: 0.014726035256773903
          policy_loss: -0.02622449884398116
          total_loss: -0.03361793199761046
          vf_explained_var: -0.8779258728027344
          vf_loss: 0.0004826481748346446
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,311,7475.9,311000,0,0,0,364.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-09_00-29-07
  done: false
  episode_len_mean: 364.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 839
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7964871459537082
          entropy_coeff: 0.009999999999999998
          kl: 0.012881443429848843
          policy_loss: -0.10083130529771249
          total_loss: -0.10858671019474665
          vf_explained_var: -0.5143560171127319
          vf_loss: 0.0004276204065212773
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,312,7496.84,312000,0,0,0,364.29




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-09_00-29-48
  done: false
  episode_len_mean: 363.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 842
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6430436836348639
          entropy_coeff: 0.009999999999999998
          kl: 0.010112695920485897
          policy_loss: -0.05397223879893621
          total_loss: -0.06225619138114982
          vf_explained_var: -0.8519371151924133
          vf_loss: 0.00046715445318518
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,313,7538.02,313000,0,0,0,363.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-09_00-30-09
  done: false
  episode_len_mean: 364.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 844
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.853509279092153
          entropy_coeff: 0.009999999999999998
          kl: 0.013038012988602353
          policy_loss: -0.07094901195830769
          total_loss: -0.07884810502744383
          vf_explained_var: -0.7360120415687561
          vf_loss: 0.0007352594118047919
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,314,7558.54,314000,0,0,0,364.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-09_00-30-36
  done: false
  episode_len_mean: 361.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 848
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8362863116794161
          entropy_coeff: 0.009999999999999998
          kl: 0.01166912482249164
          policy_loss: -0.06853512496583991
          total_loss: -0.07752014690389236
          vf_explained_var: 0.10760511457920074
          vf_loss: 0.0005165999748795811
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,315,7586.15,315000,0,0,0,361.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-09_00-31-01
  done: false
  episode_len_mean: 360.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 851
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7085336446762085
          entropy_coeff: 0.009999999999999998
          kl: 0.013271993450941742
          policy_loss: -0.029004184239440494
          total_loss: -0.035535034123394225
          vf_explained_var: -0.17631298303604126
          vf_loss: 0.0004760673353707211
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,316,7611.16,316000,0,0,0,360.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-09_00-31-26
  done: false
  episode_len_mean: 361.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 854
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.54807774093416
          entropy_coeff: 0.009999999999999998
          kl: 0.009992405578067753
          policy_loss: -0.06843603832854164
          total_loss: -0.07527312346630627
          vf_explained_var: 0.1346280723810196
          vf_loss: 0.0010557074510466513
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,317,7635.58,317000,0,0,0,361.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-09_00-31-54
  done: false
  episode_len_mean: 358.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 858
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6232091267903646
          entropy_coeff: 0.009999999999999998
          kl: 0.011610939806949558
          policy_loss: -0.11330421726322837
          total_loss: -0.12035181493394904
          vf_explained_var: -0.7710806727409363
          vf_loss: 0.0003674355775324835
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,318,7663.91,318000,0,0,0,358.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-09_00-32-23
  done: false
  episode_len_mean: 354.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 861
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.514406865172916
          entropy_coeff: 0.009999999999999998
          kl: 0.012298572245954473
          policy_loss: -0.058827646014591055
          total_loss: -0.06420852372215854
          vf_explained_var: 0.2017059326171875
          vf_loss: 0.0004239611756121222
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,319,7692.36,319000,0,0,0,354.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-09_00-32-51
  done: false
  episode_len_mean: 350.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 865
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7217505560980904
          entropy_coeff: 0.009999999999999998
          kl: 0.013600246198970625
          policy_loss: -0.09186961522532834
          total_loss: -0.09821269063072072
          vf_explained_var: 0.04528075456619263
          vf_loss: 0.0005467440283003573
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,320,7721.1,320000,0,0,0,350.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-09_00-33-15
  done: false
  episode_len_mean: 350.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 868
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9329006565941704
          entropy_coeff: 0.009999999999999998
          kl: 0.00931215237635184
          policy_loss: -0.060977428696221775
          total_loss: -0.0726295555010438
          vf_explained_var: -0.6111246943473816
          vf_loss: 0.0006054641636890463
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,321,7744.48,321000,0,0,0,350.85




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-09_00-33-59
  done: false
  episode_len_mean: 348.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 871
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7267248524559868
          entropy_coeff: 0.009999999999999998
          kl: 0.009222862405039538
          policy_loss: -0.011647725022501415
          total_loss: -0.021567416356669533
          vf_explained_var: -0.14605213701725006
          vf_loss: 0.0003439446990442876
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,322,7788.51,322000,0,0,0,348.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-09_00-34-27
  done: false
  episode_len_mean: 344.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 875
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7335978428522745
          entropy_coeff: 0.009999999999999998
          kl: 0.012172504514493306
          policy_loss: -0.060346900692416564
          total_loss: -0.06779833056239618
          vf_explained_var: -0.7659696936607361
          vf_loss: 0.0006410504874010156
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,323,7816.19,323000,0,0,0,344.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-09_00-34-55
  done: false
  episode_len_mean: 342.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 878
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5696959177652994
          entropy_coeff: 0.009999999999999998
          kl: 0.009243244195162327
          policy_loss: 0.011949452757835387
          total_loss: 0.0035316056882341703
          vf_explained_var: -0.12246712297201157
          vf_loss: 0.00026002328627833374
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,324,7844.6,324000,0,0,0,342.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-09_00-35-22
  done: false
  episode_len_mean: 342.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 881
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8503638863563538
          entropy_coeff: 0.009999999999999998
          kl: 0.018801567069850953
          policy_loss: -0.04467717707157135
          total_loss: -0.04820996444258425
          vf_explained_var: -0.07489020377397537
          vf_loss: 0.0006934147906981202
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,325,7871.09,325000,0,0,0,342.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-09_00-35-46
  done: false
  episode_len_mean: 339.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 884
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.893311325709025
          entropy_coeff: 0.009999999999999998
          kl: 0.010150090403545696
          policy_loss: -0.028413507994264363
          total_loss: -0.03902888387027714
          vf_explained_var: -0.38029754161834717
          vf_loss: 0.0006100131826113082
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,326,7895.54,326000,0,0,0,339.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-09_00-36-11
  done: false
  episode_len_mean: 335.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 888
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6172195156415303
          entropy_coeff: 0.009999999999999998
          kl: 0.007920017103158723
          policy_loss: -0.11680289999478394
          total_loss: -0.12663115192618635
          vf_explained_var: -0.3839016258716583
          vf_loss: 0.00032968067923017466
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,327,7920.76,327000,0,0,0,335.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-09_00-36-38
  done: false
  episode_len_mean: 333.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 891
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.731450609366099
          entropy_coeff: 0.009999999999999998
          kl: 0.014458362695691701
          policy_loss: -0.0589182981600364
          total_loss: -0.06470945067703723
          vf_explained_var: -0.5175126194953918
          vf_loss: 0.0005440339103289362
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,328,7947.95,328000,0,0,0,333.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-09_00-37-04
  done: false
  episode_len_mean: 330.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 894
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6756452017360264
          entropy_coeff: 0.009999999999999998
          kl: 0.010686182510423473
          policy_loss: -0.053401436149660086
          total_loss: -0.06135055305332773
          vf_explained_var: -0.9700965285301208
          vf_loss: 0.0006925161287654191
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,329,7973.47,329000,0,0,0,330.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-09_00-37-27
  done: false
  episode_len_mean: 330.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 897
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.741285385025872
          entropy_coeff: 0.009999999999999998
          kl: 0.01125710559117999
          policy_loss: -0.06372327343043353
          total_loss: -0.07190193854686287
          vf_explained_var: -0.7243521213531494
          vf_loss: 0.0006858261813047445
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,330,7996.79,330000,0,0,0,330.64




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-09_00-38-08
  done: false
  episode_len_mean: 330.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 900
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9120015144348144
          entropy_coeff: 0.009999999999999998
          kl: 0.011730998501184725
          policy_loss: -0.0868447181665235
          total_loss: -0.09636261998158362
          vf_explained_var: -0.9919666051864624
          vf_loss: 0.0006938870171628272
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,331,8037.12,331000,0,0,0,330.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-09_00-38-34
  done: false
  episode_len_mean: 327.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 903
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6117473165194194
          entropy_coeff: 0.009999999999999998
          kl: 0.010191360873518976
          policy_loss: -0.0625984280059735
          total_loss: -0.07060879522727595
          vf_explained_var: -0.8372284770011902
          vf_loss: 0.0003680389227358521
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,332,8063.25,332000,0,0,0,327.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-09_00-38-58
  done: false
  episode_len_mean: 324.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 906
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6860904826058283
          entropy_coeff: 0.009999999999999998
          kl: 0.011514585373643055
          policy_loss: -0.04839801277137465
          total_loss: -0.05563315475980441
          vf_explained_var: -0.5390676856040955
          vf_loss: 0.0008818746993913212
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,333,8086.96,333000,0,0,0,324.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-09_00-39-20
  done: false
  episode_len_mean: 325.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 909
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7171101729075113
          entropy_coeff: 0.009999999999999998
          kl: 0.017663754141468976
          policy_loss: -0.037123097562127644
          total_loss: -0.039933824208047655
          vf_explained_var: -0.2731582820415497
          vf_loss: 0.0009469598425008978
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,334,8108.97,334000,0,0,0,325.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-09_00-39-45
  done: false
  episode_len_mean: 325.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 912
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.753043500582377
          entropy_coeff: 0.009999999999999998
          kl: 0.013259610214655342
          policy_loss: -0.04431859478354454
          total_loss: -0.051099419966340064
          vf_explained_var: -0.6594972610473633
          vf_loss: 0.0006805964936372927
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,335,8134.01,335000,0,0,0,325.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-09_00-40-09
  done: false
  episode_len_mean: 324.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 915
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.673782049285041
          entropy_coeff: 0.009999999999999998
          kl: 0.007969862088364069
          policy_loss: -0.054984087041682665
          total_loss: -0.06528197611785597
          vf_explained_var: -0.6300413012504578
          vf_loss: 0.00038781787297921256
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,336,8158.61,336000,0,0,0,324.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-09_00-40-33
  done: false
  episode_len_mean: 324.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 918
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.751715358098348
          entropy_coeff: 0.009999999999999998
          kl: 0.00739310314895724
          policy_loss: -0.08110736873414781
          total_loss: -0.09229713680429591
          vf_explained_var: -0.5778748989105225
          vf_loss: 0.0007132479662282599
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,337,8182.55,337000,0,0,0,324.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-09_00-40-56
  done: false
  episode_len_mean: 324.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 921
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6443043337927925
          entropy_coeff: 0.009999999999999998
          kl: 0.010729170137178556
          policy_loss: -0.0892781403950519
          total_loss: -0.09692708976152871
          vf_explained_var: -0.8212478756904602
          vf_loss: 0.0006466312751096363
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,338,8205.58,338000,0,0,0,324.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-09_00-41-18
  done: false
  episode_len_mean: 325.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 924
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.83735036055247
          entropy_coeff: 0.009999999999999998
          kl: 0.00866187059347424
          policy_loss: -0.009837479951481024
          total_loss: -0.021089909670667515
          vf_explained_var: -1.0
          vf_loss: 0.0005434634503520404
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,339,8226.84,339000,0,0,0,325.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-09_00-41-41
  done: false
  episode_len_mean: 324.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 927
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.744812802473704
          entropy_coeff: 0.009999999999999998
          kl: 0.011118847339780184
          policy_loss: -0.03745098370644781
          total_loss: -0.04602974388334486
          vf_explained_var: -0.49656108021736145
          vf_loss: 0.00042599193224709275
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,340,8250.12,340000,0,0,0,324.53




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-09_00-42-21
  done: false
  episode_len_mean: 324.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 930
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6363880038261414
          entropy_coeff: 0.009999999999999998
          kl: 0.012354562001651498
          policy_loss: -0.0381750682161914
          total_loss: -0.04452861990365717
          vf_explained_var: -0.6525084376335144
          vf_loss: 0.0006285828625550493
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,341,8290,341000,0,0,0,324.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-09_00-42-42
  done: false
  episode_len_mean: 323.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 932
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7349804507361517
          entropy_coeff: 0.009999999999999998
          kl: 0.008834103249317632
          policy_loss: -0.036738250735733245
          total_loss: -0.04697655791209804
          vf_explained_var: -0.776884913444519
          vf_loss: 0.00040310017082245193
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,342,8311.41,342000,0,0,0,323.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-09_00-43-03
  done: false
  episode_len_mean: 324.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 935
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7080966141488818
          entropy_coeff: 0.009999999999999998
          kl: 0.012031820042029375
          policy_loss: -0.09202103151215447
          total_loss: -0.0989081045716173
          vf_explained_var: -0.6542938947677612
          vf_loss: 0.0010572301741275522
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,343,8332.21,343000,0,0,0,324.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-09_00-43-28
  done: false
  episode_len_mean: 323.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 938
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7346137033568487
          entropy_coeff: 0.009999999999999998
          kl: 0.013492070215457888
          policy_loss: -0.04007200805677308
          total_loss: -0.04639951913721031
          vf_explained_var: -0.4822275936603546
          vf_loss: 0.0007730859409396847
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,344,8356.8,344000,0,0,0,323.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-09_00-43-49
  done: false
  episode_len_mean: 323.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 941
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.69814875125885
          entropy_coeff: 0.009999999999999998
          kl: 0.009676721302073983
          policy_loss: -0.10539844243062867
          total_loss: -0.11433870968305403
          vf_explained_var: -0.45691511034965515
          vf_loss: 0.0006929611509096706
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,345,8378.61,345000,0,0,0,323.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-09_00-44-12
  done: false
  episode_len_mean: 322.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 944
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.836799110306634
          entropy_coeff: 0.009999999999999998
          kl: 0.01221547252669345
          policy_loss: -0.07244304488930438
          total_loss: -0.08099163547158242
          vf_explained_var: -0.624305248260498
          vf_loss: 0.0005432787331907699
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,346,8400.76,346000,0,0,0,322.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-09_00-44-36
  done: false
  episode_len_mean: 323.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 947
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7288427087995741
          entropy_coeff: 0.009999999999999998
          kl: 0.015322990124228377
          policy_loss: -0.014101866798268425
          total_loss: -0.01883046693272061
          vf_explained_var: -0.4423193335533142
          vf_loss: 0.0009239293942098609
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,347,8425.08,347000,0,0,0,323.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-09_00-44-58
  done: false
  episode_len_mean: 325.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 949
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.73333690961202
          entropy_coeff: 0.009999999999999998
          kl: 0.00975183234871546
          policy_loss: -0.07812106216119395
          total_loss: -0.0876632439179553
          vf_explained_var: -0.8911321759223938
          vf_loss: 0.0003858912695755458
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,348,8447.29,348000,0,0,0,325.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-09_00-45-18
  done: false
  episode_len_mean: 326.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 952
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7721512238184611
          entropy_coeff: 0.009999999999999998
          kl: 0.015069585096309308
          policy_loss: -0.04701653863820765
          total_loss: -0.05276502759920226
          vf_explained_var: -0.9943267703056335
          vf_loss: 0.0005295558159963953
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,349,8467.36,349000,0,0,0,326.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-09_00-45-42
  done: false
  episode_len_mean: 327.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 955
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7960313306914435
          entropy_coeff: 0.009999999999999998
          kl: 0.011110414890372351
          policy_loss: -0.0854033115837309
          total_loss: -0.09433536099063025
          vf_explained_var: -0.5156334638595581
          vf_loss: 0.0005912947827406849
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,350,8490.7,350000,0,0,0,327.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-09_00-46-05
  done: false
  episode_len_mean: 329.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 958
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6950039996041193
          entropy_coeff: 0.009999999999999998
          kl: 0.011442256786288851
          policy_loss: -0.058757794400056206
          total_loss: -0.06658552926447657
          vf_explained_var: -0.59684157371521
          vf_loss: 0.0004333417317765351
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,351,8514.48,351000,0,0,0,329.22




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-09_00-46-49
  done: false
  episode_len_mean: 330.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 961
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.575745603773329
          entropy_coeff: 0.009999999999999998
          kl: 0.011480593730689116
          policy_loss: -0.021634147771530682
          total_loss: -0.02830942196564542
          vf_explained_var: -0.5536864995956421
          vf_loss: 0.0003641061500982485
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,352,8558.34,352000,0,0,0,330.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-09_00-47-11
  done: false
  episode_len_mean: 332.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 964
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7193170070648194
          entropy_coeff: 0.009999999999999998
          kl: 0.01055927032819119
          policy_loss: -0.03748771775927809
          total_loss: -0.04598957730664147
          vf_explained_var: -0.8603107929229736
          vf_loss: 0.0006728620323378386
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,353,8579.93,353000,0,0,0,332.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-09_00-47-33
  done: false
  episode_len_mean: 333.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 967
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8046759578916762
          entropy_coeff: 0.009999999999999998
          kl: 0.00931165231680661
          policy_loss: -0.10456080254581239
          total_loss: -0.11512783004177941
          vf_explained_var: -0.9973012208938599
          vf_loss: 0.0004086956729426877
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,354,8602.43,354000,0,0,0,333.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-09_00-47-55
  done: false
  episode_len_mean: 334.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 969
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7800806085268657
          entropy_coeff: 0.009999999999999998
          kl: 0.012122624576356362
          policy_loss: -0.015159270001782311
          total_loss: -0.02315544341173437
          vf_explained_var: -1.0
          vf_loss: 0.000599016270522649
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,355,8624.18,355000,0,0,0,334.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-09_00-48-17
  done: false
  episode_len_mean: 336.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 972
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5307522363132902
          entropy_coeff: 0.009999999999999998
          kl: 0.011693397463616308
          policy_loss: -0.05503876159588496
          total_loss: -0.0609578400850296
          vf_explained_var: -0.9974457621574402
          vf_loss: 0.0005087720175247847
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,356,8646.16,356000,0,0,0,336.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-09_00-48-40
  done: false
  episode_len_mean: 338.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 975
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8214200072818332
          entropy_coeff: 0.009999999999999998
          kl: 0.01203268061484951
          policy_loss: -0.061578132812347676
          total_loss: -0.07007233469436566
          vf_explained_var: -0.7759602069854736
          vf_loss: 0.0005826839097102897
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,357,8668.53,357000,0,0,0,338.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-09_00-49-00
  done: false
  episode_len_mean: 341.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 977
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8618277192115784
          entropy_coeff: 0.009999999999999998
          kl: 0.01266538376622733
          policy_loss: -0.07042602675242557
          total_loss: -0.07889430774375797
          vf_explained_var: -0.6115565896034241
          vf_loss: 0.000532220763206068
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,358,8689.07,358000,0,0,0,341.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-09_00-49-21
  done: false
  episode_len_mean: 343.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 980
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8352068371242947
          entropy_coeff: 0.009999999999999998
          kl: 0.015151903730118111
          policy_loss: -0.08579554417067103
          total_loss: -0.09225240531894896
          vf_explained_var: -0.5272707343101501
          vf_loss: 0.00038923058924006506
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,359,8709.77,359000,0,0,0,343.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-09_00-49-42
  done: false
  episode_len_mean: 345.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 983
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8076627797550626
          entropy_coeff: 0.009999999999999998
          kl: 0.009723825619661428
          policy_loss: -0.05766642543797692
          total_loss: -0.06791252497997549
          vf_explained_var: -1.0
          vf_loss: 0.00044649563238231675
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,360,8730.76,360000,0,0,0,345.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-09_00-50-06
  done: false
  episode_len_mean: 346.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 986
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5155977567036947
          entropy_coeff: 0.009999999999999998
          kl: 0.010603028497009268
          policy_loss: -0.08322821019424333
          total_loss: -0.08983756916390526
          vf_explained_var: -0.175285205245018
          vf_loss: 0.000494942867792108
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,361,8754.43,361000,0,0,0,346.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-09_00-50-30
  done: false
  episode_len_mean: 346.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 989
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4748159037695991
          entropy_coeff: 0.009999999999999998
          kl: 0.010427757424444047
          policy_loss: -0.08668465544987056
          total_loss: -0.09301220940219032
          vf_explained_var: -0.1289977878332138
          vf_loss: 0.0005020236651439013
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,362,8778.34,362000,0,0,0,346.88




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-09_00-51-09
  done: false
  episode_len_mean: 347.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 991
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6296706159909566
          entropy_coeff: 0.009999999999999998
          kl: 0.008356188238313559
          policy_loss: 0.01574796260231071
          total_loss: 0.006278212761713399
          vf_explained_var: -0.5117955803871155
          vf_loss: 0.00048147437984072085
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,363,8817.93,363000,0,0,0,347.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-09_00-51-29
  done: false
  episode_len_mean: 350.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 994
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8212205661667717
          entropy_coeff: 0.009999999999999998
          kl: 0.013659603646917755
          policy_loss: -0.09128384581870502
          total_loss: -0.09863607701328066
          vf_explained_var: -0.9884932041168213
          vf_loss: 0.00048721115777475966
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,364,8838.14,364000,0,0,0,350.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-09_00-51-50
  done: false
  episode_len_mean: 351.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 997
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.73018014298545
          entropy_coeff: 0.009999999999999998
          kl: 0.01277146373856233
          policy_loss: -0.06981271604696909
          total_loss: -0.07696317264603245
          vf_explained_var: -0.7808289527893066
          vf_loss: 0.0004530138213239196
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,365,8859.04,365000,0,0,0,351.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-09_00-52-12
  done: false
  episode_len_mean: 352.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 999
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7237138589223227
          entropy_coeff: 0.009999999999999998
          kl: 0.01243726283426096
          policy_loss: -0.03560391771089699
          total_loss: -0.042569134953535266
          vf_explained_var: -0.43241438269615173
          vf_loss: 0.000827377890689402
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,366,8881.14,366000,0,0,0,352.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-09_00-52-33
  done: false
  episode_len_mean: 354.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1002
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.678423117266761
          entropy_coeff: 0.009999999999999998
          kl: 0.011477419380783062
          policy_loss: -0.06808777215580146
          total_loss: -0.07557030509536465
          vf_explained_var: -0.3902130126953125
          vf_loss: 0.0005860321902825187
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,367,8902.17,367000,0,0,0,354.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-09_00-52-58
  done: false
  episode_len_mean: 355.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1005
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7180765681796604
          entropy_coeff: 0.009999999999999998
          kl: 0.010291703826281287
          policy_loss: -0.09363463293347093
          total_loss: -0.10260981234411398
          vf_explained_var: -0.6288221478462219
          vf_loss: 0.00039032417116686703
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,368,8926.25,368000,0,0,0,355.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-09_00-53-20
  done: false
  episode_len_mean: 354.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1008
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6635227693451775
          entropy_coeff: 0.009999999999999998
          kl: 0.011972495091219079
          policy_loss: -0.06451929184711641
          total_loss: -0.07164353094995021
          vf_explained_var: -1.0
          vf_loss: 0.00041937446221709254
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,369,8948.62,369000,0,0,0,354.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-09_00-53-43
  done: false
  episode_len_mean: 355.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1010
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8082424362500509
          entropy_coeff: 0.009999999999999998
          kl: 0.012073278925936945
          policy_loss: -0.06015591613120503
          total_loss: -0.06865557676388158
          vf_explained_var: -0.9083936810493469
          vf_loss: 0.0004146193529272245
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,370,8972.15,370000,0,0,0,355.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-09_00-54-05
  done: false
  episode_len_mean: 356.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1013
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.761092107825809
          entropy_coeff: 0.009999999999999998
          kl: 0.01603532993356738
          policy_loss: -0.041471842671227124
          total_loss: -0.046291871793154214
          vf_explained_var: -0.5976178050041199
          vf_loss: 0.0006140615351291166
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,371,8993.77,371000,0,0,0,356.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-09_00-54-27
  done: false
  episode_len_mean: 357.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1016
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7859682851367527
          entropy_coeff: 0.009999999999999998
          kl: 0.01123955474551575
          policy_loss: -0.09445367939770222
          total_loss: -0.10347306958089272
          vf_explained_var: -0.998016893863678
          vf_loss: 0.00030525562857898573
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,372,9015.16,372000,0,0,0,357.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-09_00-54-49
  done: false
  episode_len_mean: 358.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1019
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7960466199451022
          entropy_coeff: 0.009999999999999998
          kl: 0.012123696008794364
          policy_loss: -0.06620641267961926
          total_loss: -0.07461443607591921
          vf_explained_var: -1.0
          vf_loss: 0.00034601129817828124
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,373,9038.08,373000,0,0,0,358.49




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-09_00-55-28
  done: false
  episode_len_mean: 358.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1021
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6878671990500556
          entropy_coeff: 0.009999999999999998
          kl: 0.011578652178696953
          policy_loss: -0.09312105728313327
          total_loss: -0.10059370023922788
          vf_explained_var: -0.832356333732605
          vf_loss: 0.0006134897189137216
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,374,9077.08,374000,0,0,0,358.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-09_00-55-51
  done: false
  episode_len_mean: 359.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1024
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.791687990559472
          entropy_coeff: 0.009999999999999998
          kl: 0.010470734094782665
          policy_loss: -0.10405726966758569
          total_loss: -0.11365177006357247
          vf_explained_var: -0.9597432613372803
          vf_loss: 0.00037116645883846405
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,375,9099.1,375000,0,0,0,359.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-09_00-56-11
  done: false
  episode_len_mean: 359.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1026
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6823504699601068
          entropy_coeff: 0.009999999999999998
          kl: 0.010895162407246377
          policy_loss: 0.011515515256259176
          total_loss: 0.004150865889257855
          vf_explained_var: -0.15900911390781403
          vf_loss: 0.0011853434514099111
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,376,9119.55,376000,0,0,0,359.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-09_00-56-34
  done: false
  episode_len_mean: 361.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1029
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.700077059533861
          entropy_coeff: 0.009999999999999998
          kl: 0.012008338790938131
          policy_loss: -0.05347761913306183
          total_loss: -0.060909581774224834
          vf_explained_var: -0.743622899055481
          vf_loss: 0.00044997625906641284
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,377,9142.5,377000,0,0,0,361.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-09_00-56-59
  done: false
  episode_len_mean: 360.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1032
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4622274054421318
          entropy_coeff: 0.009999999999999998
          kl: 0.012486528402284364
          policy_loss: -0.08726635802951124
          total_loss: -0.09170834438668357
          vf_explained_var: -0.12305673956871033
          vf_loss: 0.0006983297760598361
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,378,9167.46,378000,0,0,0,360.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-09_00-57-24
  done: false
  episode_len_mean: 358.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1036
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6533175415462917
          entropy_coeff: 0.009999999999999998
          kl: 0.01233775574898317
          policy_loss: -0.064755506399605
          total_loss: -0.07140387321511904
          vf_explained_var: 0.06248040124773979
          vf_loss: 0.0005158257958505095
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,379,9192.08,379000,0,0,0,358.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-09_00-57-47
  done: false
  episode_len_mean: 359.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1038
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6207076496548123
          entropy_coeff: 0.009999999999999998
          kl: 0.012510018493384381
          policy_loss: -0.07492510301785336
          total_loss: -0.08126180875632498
          vf_explained_var: -0.7182639241218567
          vf_loss: 0.00037057270674267784
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,380,9215.45,380000,0,0,0,359.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-09_00-58-12
  done: false
  episode_len_mean: 358.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1041
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.681047785282135
          entropy_coeff: 0.009999999999999998
          kl: 0.011899630576746133
          policy_loss: -0.07035093365444077
          total_loss: -0.07764029618766573
          vf_explained_var: -1.0
          vf_loss: 0.0004848332171806962
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,381,9239.96,381000,0,0,0,358.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-09_00-58-36
  done: false
  episode_len_mean: 357.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1045
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7016720016797384
          entropy_coeff: 0.009999999999999998
          kl: 0.013400319268698939
          policy_loss: -0.13142838570703236
          total_loss: -0.1376671197461999
          vf_explained_var: -0.873178243637085
          vf_loss: 0.0006021147377194009
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,382,9264.78,382000,0,0,0,357.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-09_00-59-03
  done: false
  episode_len_mean: 355.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1048
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2796082231733534
          entropy_coeff: 0.009999999999999998
          kl: 0.012325523478180974
          policy_loss: -0.03255223267608219
          total_loss: -0.035648780398898655
          vf_explained_var: -0.5385023355484009
          vf_loss: 0.0003398390915309493
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,383,9291.44,383000,0,0,0,355.93




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-09_00-59-44
  done: false
  episode_len_mean: 352.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1051
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.255895041094886
          entropy_coeff: 0.009999999999999998
          kl: 0.017750360216390884
          policy_loss: -0.10705148122376866
          total_loss: -0.10494938095410665
          vf_explained_var: 0.06855324655771255
          vf_loss: 0.0011818688036227185
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,384,9332.88,384000,0,0,0,352.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-09_01-00-08
  done: false
  episode_len_mean: 352.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1054
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.53382216029697
          entropy_coeff: 0.009999999999999998
          kl: 0.01204707584885626
          policy_loss: -0.06703941310859389
          total_loss: -0.07284588414347834
          vf_explained_var: -0.8748816847801208
          vf_loss: 0.0003835026870041879
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,385,9356.18,385000,0,0,0,352.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-09_01-00-30
  done: false
  episode_len_mean: 353.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1057
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4466111712985568
          entropy_coeff: 0.009999999999999998
          kl: 0.016781890378089196
          policy_loss: 0.00013869388235939874
          total_loss: -0.0003596345583597819
          vf_explained_var: 0.4514227509498596
          vf_loss: 0.0012240356138338232
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,386,9378.46,386000,0,0,0,353.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-09_01-00-54
  done: false
  episode_len_mean: 354.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1060
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5308585988150702
          entropy_coeff: 0.009999999999999998
          kl: 0.013680241926602053
          policy_loss: -0.046185879564533634
          total_loss: -0.050590037575198546
          vf_explained_var: -0.9719901084899902
          vf_loss: 0.000515993889388887
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,387,9401.94,387000,0,0,0,354.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-09_01-01-14
  done: false
  episode_len_mean: 354.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1062
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.448099050256941
          entropy_coeff: 0.009999999999999998
          kl: 0.007488091800662744
          policy_loss: -0.08967871655606562
          total_loss: -0.09800194539129733
          vf_explained_var: -1.0
          vf_loss: 0.0004714900158837231
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,388,9421.91,388000,0,0,0,354.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-09_01-01-35
  done: false
  episode_len_mean: 355.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1065
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7151077071825662
          entropy_coeff: 0.009999999999999998
          kl: 0.011626549621487786
          policy_loss: -0.10244066416182451
          total_loss: -0.11030472233477566
          vf_explained_var: -0.6499803066253662
          vf_loss: 0.0004581092701603969
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,389,9442.85,389000,0,0,0,355.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-09_01-01-59
  done: false
  episode_len_mean: 354.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1068
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3665859169430203
          entropy_coeff: 0.009999999999999998
          kl: 0.010952240225512297
          policy_loss: -0.016213794818354978
          total_loss: -0.021131810587313442
          vf_explained_var: -0.262382447719574
          vf_loss: 0.00043098404841859724
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,390,9466.89,390000,0,0,0,354.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-09_01-02-21
  done: false
  episode_len_mean: 354.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1071
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5498123327891031
          entropy_coeff: 0.009999999999999998
          kl: 0.011677657554733992
          policy_loss: -0.06846254629393418
          total_loss: -0.07446098818133275
          vf_explained_var: -0.5053842663764954
          vf_loss: 0.0006319571975230549
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,391,9489.73,391000,0,0,0,354.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-09_01-02-46
  done: false
  episode_len_mean: 353.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1074
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5426768832736546
          entropy_coeff: 0.009999999999999998
          kl: 0.0077617210181700836
          policy_loss: 0.0010353424068954256
          total_loss: -0.00796370313813289
          vf_explained_var: -0.4314973056316376
          vf_loss: 0.0005336655080706502
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,392,9513.97,392000,0,0,0,353.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-09_01-03-13
  done: false
  episode_len_mean: 351.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1077
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.454800260066986
          entropy_coeff: 0.009999999999999998
          kl: 0.010100959632996107
          policy_loss: -0.07153395189800196
          total_loss: -0.07807138576689694
          vf_explained_var: -0.9632542133331299
          vf_loss: 0.00034015210031712844
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,393,9541.07,393000,0,0,0,351.16




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-09_01-03-52
  done: false
  episode_len_mean: 349.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1080
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.696983888414171
          entropy_coeff: 0.009999999999999998
          kl: 0.012279494359792132
          policy_loss: -0.06571995144089063
          total_loss: -0.07298285677615139
          vf_explained_var: -0.85057532787323
          vf_loss: 0.00038219161910496444
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,394,9580.22,394000,0,0,0,349.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-09_01-04-16
  done: false
  episode_len_mean: 348.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1083
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.516602169142829
          entropy_coeff: 0.009999999999999998
          kl: 0.01556561035007626
          policy_loss: -0.09600029148989253
          total_loss: -0.09900460516413052
          vf_explained_var: -0.6423908472061157
          vf_loss: 0.00034157640604664467
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,395,9604.29,395000,0,0,0,348.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-09_01-04-38
  done: false
  episode_len_mean: 348.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1086
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6532245212131076
          entropy_coeff: 0.009999999999999998
          kl: 0.011139588593854001
          policy_loss: -0.0380987507601579
          total_loss: -0.045660722193618615
          vf_explained_var: -0.46351391077041626
          vf_loss: 0.0005111498147016391
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,396,9626.59,396000,0,0,0,348.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-09_01-05-00
  done: false
  episode_len_mean: 350.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1089
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.766854539182451
          entropy_coeff: 0.009999999999999998
          kl: 0.013702064610830848
          policy_loss: -0.08475838028308418
          total_loss: -0.09161324747320679
          vf_explained_var: -0.8595046997070312
          vf_loss: 0.0004086743351460124
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,397,9647.66,397000,0,0,0,350.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-09_01-05-23
  done: false
  episode_len_mean: 350.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1091
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4978354533513387
          entropy_coeff: 0.009999999999999998
          kl: 0.009890536768719194
          policy_loss: -0.029544553409020107
          total_loss: -0.03665593701104323
          vf_explained_var: -0.8915939331054688
          vf_loss: 0.00035634223556068413
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,398,9670.86,398000,0,0,0,350.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-09_01-05-43
  done: false
  episode_len_mean: 349.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1094
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7222117834621005
          entropy_coeff: 0.009999999999999998
          kl: 0.013698554861202854
          policy_loss: -0.04341072208351559
          total_loss: -0.04985801925261815
          vf_explained_var: -0.5978087782859802
          vf_loss: 0.00037248238287348714
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,399,9691.25,399000,0,0,0,349.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-09_01-06-03
  done: false
  episode_len_mean: 349.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1096
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6632076899210613
          entropy_coeff: 0.009999999999999998
          kl: 0.00781441485946539
          policy_loss: -0.01617718297574255
          total_loss: -0.02654648742948969
          vf_explained_var: -0.9441815614700317
          vf_loss: 0.0003287008273117762
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,400,9710.75,400000,0,0,0,349.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-09_01-06-23
  done: false
  episode_len_mean: 351.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1099
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4034625457392798
          entropy_coeff: 0.009999999999999998
          kl: 0.00856039499913754
          policy_loss: -0.09531312924292352
          total_loss: -0.10067852445774608
          vf_explained_var: -0.26530125737190247
          vf_loss: 0.0021686863682892485
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,401,9730.96,401000,0,0,0,351.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-09_01-06-46
  done: false
  episode_len_mean: 350.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1102
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6745148870680067
          entropy_coeff: 0.009999999999999998
          kl: 0.011814672060437993
          policy_loss: -0.07310657277703285
          total_loss: -0.08056011699760954
          vf_explained_var: -1.0
          vf_loss: 0.00031983929818832417
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,402,9753.72,402000,0,0,0,350.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-09_01-07-08
  done: false
  episode_len_mean: 351.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1104
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7071098619037204
          entropy_coeff: 0.009999999999999998
          kl: 0.012032245095261414
          policy_loss: -0.06978811543020937
          total_loss: -0.07739871305723985
          vf_explained_var: -1.0
          vf_loss: 0.0003235147950666336
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,403,9775.71,403000,0,0,0,351.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-09_01-07-32
  done: false
  episode_len_mean: 351.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1107
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.456667160987854
          entropy_coeff: 0.009999999999999998
          kl: 0.008327713310800474
          policy_loss: -0.10542078129947186
          total_loss: -0.11339499672046965
          vf_explained_var: -0.8923378586769104
          vf_loss: 0.00026859696986826346
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,404,9799.59,404000,0,0,0,351.23




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-09_01-08-11
  done: false
  episode_len_mean: 350.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1110
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8018207748730977
          entropy_coeff: 0.009999999999999998
          kl: 0.01091117152268431
          policy_loss: -0.010010130951801937
          total_loss: -0.01950369303425153
          vf_explained_var: -0.7640705704689026
          vf_loss: 0.00023897069089192277
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,405,9838.87,405000,0,0,0,350.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-09_01-08-37
  done: false
  episode_len_mean: 348.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1113
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1910522613260481
          entropy_coeff: 0.009999999999999998
          kl: 0.00897738821904486
          policy_loss: -0.0832027497390906
          total_loss: -0.08757615743411912
          vf_explained_var: 0.10689389705657959
          vf_loss: 0.0007199090035606382
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,406,9865.38,406000,0,0,0,348.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-09_01-08-58
  done: false
  episode_len_mean: 348.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1116
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6424517922931248
          entropy_coeff: 0.009999999999999998
          kl: 0.011719412164258359
          policy_loss: -0.09446412515309122
          total_loss: -0.10156371721790897
          vf_explained_var: -0.9734299778938293
          vf_loss: 0.0004254968671981866
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,407,9886.12,407000,0,0,0,348.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-09_01-09-20
  done: false
  episode_len_mean: 349.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1119
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7188290490044489
          entropy_coeff: 0.009999999999999998
          kl: 0.010244375715650575
          policy_loss: -0.056291627801126905
          total_loss: -0.06529353625244565
          vf_explained_var: -0.9305744767189026
          vf_loss: 0.00040706103577071594
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,408,9907.98,408000,0,0,0,349.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-09_01-09-42
  done: false
  episode_len_mean: 349.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1122
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6445233424504597
          entropy_coeff: 0.009999999999999998
          kl: 0.012110414680082233
          policy_loss: -0.09782530143857002
          total_loss: -0.10471153325504727
          vf_explained_var: -0.43328985571861267
          vf_loss: 0.00036265652840180944
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,409,9930.13,409000,0,0,0,349.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-09_01-10-03
  done: false
  episode_len_mean: 349.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1124
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6225998997688293
          entropy_coeff: 0.009999999999999998
          kl: 0.009285501909438111
          policy_loss: -0.08133347390426529
          total_loss: -0.09023456478284465
          vf_explained_var: -0.9793773889541626
          vf_loss: 0.00027373051109154606
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,410,9951.13,410000,0,0,0,349


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-09_01-10-25
  done: false
  episode_len_mean: 348.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1127
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5804623206456503
          entropy_coeff: 0.009999999999999998
          kl: 0.01459372159669943
          policy_loss: -0.07423058129433129
          total_loss: -0.07838651044294238
          vf_explained_var: -0.8405075669288635
          vf_loss: 0.0005665850867646643
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,411,9972.4,411000,0,0,0,348.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-09_01-10-50
  done: false
  episode_len_mean: 347.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1130
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2139681842592027
          entropy_coeff: 0.009999999999999998
          kl: 0.01364624112242447
          policy_loss: -0.015266655012965202
          total_loss: -0.01668723995486895
          vf_explained_var: 0.40237095952033997
          vf_loss: 0.0003564822086546984
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,412,9997.49,412000,0,0,0,347.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-09_01-11-11
  done: false
  episode_len_mean: 348.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1133
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.604100133313073
          entropy_coeff: 0.009999999999999998
          kl: 0.013145541426424283
          policy_loss: -0.11247250581574109
          total_loss: -0.11802586248765388
          vf_explained_var: -0.7822988629341125
          vf_loss: 0.000505250362200766
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,413,10018.7,413000,0,0,0,348.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-09_01-11-33
  done: false
  episode_len_mean: 349.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1136
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.338834775818719
          entropy_coeff: 0.009999999999999998
          kl: 0.010576994760017852
          policy_loss: -0.021686787241035038
          total_loss: -0.026492014775673547
          vf_explained_var: -0.18448813259601593
          vf_loss: 0.0005512164634031554
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,414,10040.9,414000,0,0,0,349.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-09_01-11-56
  done: false
  episode_len_mean: 349.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1138
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4195037914646997
          entropy_coeff: 0.009999999999999998
          kl: 0.009379999023165593
          policy_loss: -0.00947488055874904
          total_loss: -0.01631818806959523
          vf_explained_var: -0.6543763279914856
          vf_loss: 0.00022879320636598602
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,415,10063.8,415000,0,0,0,349.96




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-09_01-12-35
  done: false
  episode_len_mean: 350.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1141
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6368574579556783
          entropy_coeff: 0.009999999999999998
          kl: 0.007507962258595925
          policy_loss: -0.07940945647036037
          total_loss: -0.08982602919762334
          vf_explained_var: -0.6751846671104431
          vf_loss: 0.0002506428010140856
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,416,10102.5,416000,0,0,0,350.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-09_01-12-53
  done: false
  episode_len_mean: 352.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1143
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3166107171111636
          entropy_coeff: 0.009999999999999998
          kl: 0.01697956913087185
          policy_loss: -0.05063836293088065
          total_loss: -0.05007023099395964
          vf_explained_var: -0.8143810629844666
          vf_loss: 0.0008403791738803395
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,417,10121.1,417000,0,0,0,352.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-09_01-13-17
  done: false
  episode_len_mean: 354.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1146
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4205565161175198
          entropy_coeff: 0.009999999999999998
          kl: 0.014249169189432268
          policy_loss: -0.057568904384970664
          total_loss: -0.06058344758219189
          vf_explained_var: -0.6912520527839661
          vf_loss: 0.00037056044950279304
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,418,10144.4,418000,0,0,0,354.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-09_01-13-38
  done: false
  episode_len_mean: 356.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1149
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6573582265112135
          entropy_coeff: 0.009999999999999998
          kl: 0.013758283568555222
          policy_loss: -0.12665603566500877
          total_loss: -0.1324387635000878
          vf_explained_var: -0.7449488043785095
          vf_loss: 0.00034315750171016486
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,419,10165.2,419000,0,0,0,356.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-09_01-13-58
  done: false
  episode_len_mean: 358.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1151
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6551056252585516
          entropy_coeff: 0.009999999999999998
          kl: 0.012068254690215063
          policy_loss: -0.057991571724414825
          total_loss: -0.06496520610526205
          vf_explained_var: -0.9863010048866272
          vf_loss: 0.0004130936517160282
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,420,10185.2,420000,0,0,0,358.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-09_01-14-21
  done: false
  episode_len_mean: 359.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1154
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5987773868772719
          entropy_coeff: 0.009999999999999998
          kl: 0.010790080584707675
          policy_loss: -0.10412031834324201
          total_loss: -0.11143597207135625
          vf_explained_var: 0.032576244324445724
          vf_loss: 0.00047840261411490954
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,421,10208.6,421000,0,0,0,359.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-09_01-14-44
  done: false
  episode_len_mean: 359.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1157
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5169229904810588
          entropy_coeff: 0.009999999999999998
          kl: 0.010905000775978498
          policy_loss: -0.04362946709411012
          total_loss: -0.05013547224096126
          vf_explained_var: 0.0155451949685812
          vf_loss: 0.00038223999549194965
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,422,10231.7,422000,0,0,0,359.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-09_01-15-10
  done: false
  episode_len_mean: 358.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1160
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.448355289300283
          entropy_coeff: 0.009999999999999998
          kl: 0.00905346218857882
          policy_loss: -0.06712066899571154
          total_loss: -0.07431620121416119
          vf_explained_var: -0.8137224912643433
          vf_loss: 0.00041304611465117584
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,423,10257.4,423000,0,0,0,358.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-09_01-15-30
  done: false
  episode_len_mean: 357.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1163
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3917763021257188
          entropy_coeff: 0.009999999999999998
          kl: 0.011801511878767773
          policy_loss: -0.10158270026246706
          total_loss: -0.10618737012975746
          vf_explained_var: -0.5923264026641846
          vf_loss: 0.00035131963183327266
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,424,10277.9,424000,0,0,0,357.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-09_01-15-58
  done: false
  episode_len_mean: 356.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245310491985745
          entropy_coeff: 0.009999999999999998
          kl: 0.014180429121008744
          policy_loss: -0.1431885993315114
          total_loss: -0.1452054305622975
          vf_explained_var: -0.45107176899909973
          vf_loss: 0.0004602172434614557
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,425,10305.6,425000,0,0,0,356.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-09_01-16-24
  done: false
  episode_len_mean: 355.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1169
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2722077356444466
          entropy_coeff: 0.009999999999999998
          kl: 0.014986999488259854
          policy_loss: -0.1120293453335762
          total_loss: -0.11308047523101171
          vf_explained_var: -0.3004581034183502
          vf_loss: 0.0002901954579606859
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,426,10331.9,426000,0,0,0,355.03




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-09_01-17-02
  done: false
  episode_len_mean: 357.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1172
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5383198738098145
          entropy_coeff: 0.009999999999999998
          kl: 0.010008816288519955
          policy_loss: -0.008283244859841134
          total_loss: -0.015704054633776346
          vf_explained_var: -1.0
          vf_loss: 0.000361945242588667
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,427,10369.3,427000,0,0,0,357.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-09_01-17-20
  done: false
  episode_len_mean: 359.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1174
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3636109054088592
          entropy_coeff: 0.009999999999999998
          kl: 0.011054318465893016
          policy_loss: -0.01435720030632284
          total_loss: -0.01930459523573518
          vf_explained_var: -0.7652289271354675
          vf_loss: 0.00029434036785258084
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,428,10388,428000,0,0,0,359.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-09_01-17-44
  done: false
  episode_len_mean: 361.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1177
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.529218884309133
          entropy_coeff: 0.009999999999999998
          kl: 0.013758586929029882
          policy_loss: -0.12229889055920971
          total_loss: -0.1266963997648822
          vf_explained_var: -0.1489003598690033
          vf_loss: 0.000446754146024533
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,429,10411.9,429000,0,0,0,361.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-09_01-18-05
  done: false
  episode_len_mean: 361.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1179
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6416551245583428
          entropy_coeff: 0.009999999999999998
          kl: 0.012538010913967114
          policy_loss: 0.005866515160434776
          total_loss: -0.000624420266184542
          vf_explained_var: -0.49200817942619324
          vf_loss: 0.0004045638624423494
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,430,10432.7,430000,0,0,0,361.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-09_01-18-24
  done: false
  episode_len_mean: 365.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1182
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.414483869075775
          entropy_coeff: 0.009999999999999998
          kl: 0.014230496787924431
          policy_loss: -0.006280136553363668
          total_loss: -0.009300212086074882
          vf_explained_var: -0.9993661046028137
          vf_loss: 0.0003184787702694949
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,431,10451.2,431000,0,0,0,365.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-09_01-18-44
  done: false
  episode_len_mean: 366.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1184
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8010827011532253
          entropy_coeff: 0.009999999999999998
          kl: 0.011704651173528171
          policy_loss: -0.05581827937728829
          total_loss: -0.06464484590623114
          vf_explained_var: -0.9110115170478821
          vf_loss: 0.0002960423450632435
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,432,10471.6,432000,0,0,0,366.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-09_01-19-04
  done: false
  episode_len_mean: 367.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1186
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5325515389442443
          entropy_coeff: 0.009999999999999998
          kl: 0.012147704218264721
          policy_loss: -0.10608554573522674
          total_loss: -0.11193967900342411
          vf_explained_var: -0.7185462713241577
          vf_loss: 0.000246720101373891
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,433,10490.9,433000,0,0,0,367.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-09_01-19-27
  done: false
  episode_len_mean: 367.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1189
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6496758341789246
          entropy_coeff: 0.009999999999999998
          kl: 0.013972587472152595
          policy_loss: -0.07198812257912424
          total_loss: -0.07758483294811513
          vf_explained_var: -0.9388554096221924
          vf_loss: 0.0002896147852879949
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,434,10514.5,434000,0,0,0,367.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-09_01-19-48
  done: false
  episode_len_mean: 368.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1192
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6859122501479256
          entropy_coeff: 0.009999999999999998
          kl: 0.01290918637758731
          policy_loss: -0.031165118598275716
          total_loss: -0.03786976227743758
          vf_explained_var: -1.0
          vf_loss: 0.00035156704074729026
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,435,10535.3,435000,0,0,0,368.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-09_01-20-04
  done: false
  episode_len_mean: 370.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1194
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6829677793714735
          entropy_coeff: 0.009999999999999998
          kl: 0.01317744335428325
          policy_loss: -0.10360813682071036
          total_loss: -0.1097111420912875
          vf_explained_var: -0.8636016845703125
          vf_loss: 0.0007200514422341561
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,436,10551.6,436000,0,0,0,370.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-09_01-20-25
  done: false
  episode_len_mean: 371.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1196
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3390234059757657
          entropy_coeff: 0.009999999999999998
          kl: 0.011573686602784322
          policy_loss: -0.07995192325777478
          total_loss: -0.08405318264332083
          vf_explained_var: -0.7470179200172424
          vf_loss: 0.0005002057674573735
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,437,10572.6,437000,0,0,0,371.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-09_01-20-45
  done: false
  episode_len_mean: 371.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1199
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5917388478914896
          entropy_coeff: 0.009999999999999998
          kl: 0.010677562165442033
          policy_loss: -0.04917684963179959
          total_loss: -0.056718865326709215
          vf_explained_var: -0.9594208598136902
          vf_loss: 0.0002670945919817314
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,438,10591.9,438000,0,0,0,371.11




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-09_01-21-22
  done: false
  episode_len_mean: 371.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1201
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.543713268968794
          entropy_coeff: 0.009999999999999998
          kl: 0.010948789603582062
          policy_loss: -0.013907052824894588
          total_loss: -0.020740175164408153
          vf_explained_var: -0.9996598362922668
          vf_loss: 0.0002897719999762355
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,439,10629.4,439000,0,0,0,371.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-09_01-21-43
  done: false
  episode_len_mean: 372.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1204
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5826983955171374
          entropy_coeff: 0.009999999999999998
          kl: 0.014188505388932605
          policy_loss: -0.05007859083513419
          total_loss: -0.05479908457232846
          vf_explained_var: -0.30572509765625
          vf_loss: 0.00033209501933823855
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,440,10650.5,440000,0,0,0,372.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-09_01-22-02
  done: false
  episode_len_mean: 374.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1206
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7485674593183729
          entropy_coeff: 0.009999999999999998
          kl: 0.011426482644766702
          policy_loss: -0.03155267735322317
          total_loss: -0.040167871200376085
          vf_explained_var: -1.0
          vf_loss: 0.00019349699844800245
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,441,10668.8,441000,0,0,0,374.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-09_01-22-19
  done: false
  episode_len_mean: 377.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1208
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6810108145078024
          entropy_coeff: 0.009999999999999998
          kl: 0.009962634881299278
          policy_loss: -0.12665090689228642
          total_loss: -0.1356368586421013
          vf_explained_var: -0.5220972895622253
          vf_loss: 0.0002587803181894641
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,442,10686.2,442000,0,0,0,377.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-09_01-22-41
  done: false
  episode_len_mean: 378.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1211
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6675899492369757
          entropy_coeff: 0.009999999999999998
          kl: 0.015336628000296493
          policy_loss: -0.0439594601177507
          total_loss: -0.048625533675981894
          vf_explained_var: -0.49124017357826233
          vf_loss: 0.00036357246822768096
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,443,10708,443000,0,0,0,378.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-09_01-23-04
  done: false
  episode_len_mean: 379.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1214
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5658528566360475
          entropy_coeff: 0.009999999999999998
          kl: 0.011138454929575579
          policy_loss: -0.03470700913005405
          total_loss: -0.041670764113465944
          vf_explained_var: -0.2306840717792511
          vf_loss: 0.00023650984415629256
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,444,10731,444000,0,0,0,379.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-09_01-23-23
  done: false
  episode_len_mean: 380.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1216
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5445683346854315
          entropy_coeff: 0.009999999999999998
          kl: 0.011567714102478218
          policy_loss: -0.07529463523791896
          total_loss: -0.08177933647400804
          vf_explained_var: -0.7505348920822144
          vf_loss: 0.0001767510426159586
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,445,10750.4,445000,0,0,0,380.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-09_01-23-43
  done: false
  episode_len_mean: 381.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1218
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6586127003033957
          entropy_coeff: 0.009999999999999998
          kl: 0.011319249546327449
          policy_loss: -0.05957445396731297
          total_loss: -0.06725505162030458
          vf_explained_var: -0.9695767164230347
          vf_loss: 0.0003099770644944834
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,446,10769.8,446000,0,0,0,381.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-09_01-24-01
  done: false
  episode_len_mean: 383.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1220
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5172839257452222
          entropy_coeff: 0.009999999999999998
          kl: 0.014847540946840255
          policy_loss: -0.11090958979394701
          total_loss: -0.11438952796161175
          vf_explained_var: -0.45945531129837036
          vf_loss: 0.0004180489474998063
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,447,10788.5,447000,0,0,0,383.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-09_01-24-26
  done: false
  episode_len_mean: 382.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1224
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.705980783038669
          entropy_coeff: 0.009999999999999998
          kl: 0.018595373511910686
          policy_loss: -0.03230506512853834
          total_loss: -0.0349719967900051
          vf_explained_var: -0.28132349252700806
          vf_loss: 0.00027201882120506425
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,448,10812.9,448000,0,0,0,382.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-09_01-24-52
  done: false
  episode_len_mean: 380.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1227
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5047056012683444
          entropy_coeff: 0.009999999999999998
          kl: 0.010329677188050631
          policy_loss: 0.05928375977608893
          total_loss: 0.052299997645119826
          vf_explained_var: -0.5520382523536682
          vf_loss: 0.00021919270269184685
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,449,10839,449000,0,0,0,380.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-09_01-25-13
  done: false
  episode_len_mean: 382.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1229
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.655778588189019
          entropy_coeff: 0.009999999999999998
          kl: 0.013934648610793637
          policy_loss: -0.03259268833531274
          total_loss: -0.03838927774793572
          vf_explained_var: -0.231503427028656
          vf_loss: 0.0001795738605627169
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,450,10859.6,450000,0,0,0,382.56




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-09_01-25-50
  done: false
  episode_len_mean: 384.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1232
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5806458089086743
          entropy_coeff: 0.009999999999999998
          kl: 0.007957024663928811
          policy_loss: -0.03767323687465655
          total_loss: -0.04727641908037994
          vf_explained_var: -0.42949753999710083
          vf_loss: 0.00016091035108224282
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,451,10896.9,451000,0,0,0,384.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-09_01-26-11
  done: false
  episode_len_mean: 385.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1234
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7770541588465372
          entropy_coeff: 0.009999999999999998
          kl: 0.014454615522238356
          policy_loss: -0.06829913900130324
          total_loss: -0.07495559424989753
          vf_explained_var: -0.335163414478302
          vf_loss: 0.00013761335366224456
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,452,10917.6,452000,0,0,0,385.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-09_01-26-30
  done: false
  episode_len_mean: 385.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1237
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5088249815834893
          entropy_coeff: 0.009999999999999998
          kl: 0.012901607316201992
          policy_loss: -0.09310726005997923
          total_loss: -0.09804898218976127
          vf_explained_var: -0.5850997567176819
          vf_loss: 0.0003493699655842243
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,453,10937.2,453000,0,0,0,385.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-09_01-26-50
  done: false
  episode_len_mean: 387.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1239
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6489098164770337
          entropy_coeff: 0.009999999999999998
          kl: 0.01183694734300288
          policy_loss: -0.11051757896525992
          total_loss: -0.11784893274307251
          vf_explained_var: -0.9616017937660217
          vf_loss: 0.00016906237198984148
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_since_restore: 454
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,454,10957.3,454000,0,0,0,387.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-09_01-27-11
  done: false
  episode_len_mean: 387.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1242
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6843419697549609
          entropy_coeff: 0.009999999999999998
          kl: 0.012319184091699123
          policy_loss: 0.08910144021113714
          total_loss: 0.08178504123869869
          vf_explained_var: -0.741568922996521
          vf_loss: 0.0001721403416013345
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_since_restore: 455
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,455,10977.6,455000,0,0,0,387.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-09_01-27-30
  done: false
  episode_len_mean: 386.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1244
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.465329564942254
          entropy_coeff: 0.009999999999999998
          kl: 0.013376670809277473
          policy_loss: -0.04274429803093274
          total_loss: -0.04708609026339319
          vf_explained_var: -0.7569266557693481
          vf_loss: 0.0001535931658711181
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_since_restore: 456
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,456,10997.4,456000,0,0,0,386.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-09_01-27-50
  done: false
  episode_len_mean: 389.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1247
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6362555424372356
          entropy_coeff: 0.009999999999999998
          kl: 0.013223541901013963
          policy_loss: -0.07452206268078751
          total_loss: -0.08070790804922581
          vf_explained_var: -0.938456654548645
          vf_loss: 0.00013508416288661668
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_since_restore: 457
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,457,11017.3,457000,0,0,0,389.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-09_01-28-12
  done: false
  episode_len_mean: 388.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1249
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7827820009655422
          entropy_coeff: 0.009999999999999998
          kl: 0.010971566718228519
          policy_loss: -0.06559340192211999
          total_loss: -0.07497021363427242
          vf_explained_var: -1.0
          vf_loss: 0.0001194762900316467
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_since_restore: 458
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,458,11039.2,458000,0,0,0,388.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-09_01-28-38
  done: false
  episode_len_mean: 385.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1253
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5502575781610277
          entropy_coeff: 0.009999999999999998
          kl: 0.01027109385673012
          policy_loss: -0.15165345528059535
          total_loss: -0.15922996708088452
          vf_explained_var: -0.4887457489967346
          vf_loss: 0.0001264532218758379
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iterations_since_restore: 459
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,459,11064.7,459000,0,0,0,385.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-09_01-28-59
  done: false
  episode_len_mean: 387.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1255
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.798986342218187
          entropy_coeff: 0.009999999999999998
          kl: 0.011647299475155571
          policy_loss: -0.04524052399728033
          total_loss: -0.05428604649172889
          vf_explained_var: -0.9995434284210205
          vf_loss: 9.967100583404923e-05
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterations_since_restore: 460
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,460,11085.4,460000,0,0,0,387.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-09_01-29-20
  done: false
  episode_len_mean: 387.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1258
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7132825864685906
          entropy_coeff: 0.009999999999999998
          kl: 0.012418651811867163
          policy_loss: -0.09022803716361523
          total_loss: -0.09781558327376842
          vf_explained_var: -1.0
          vf_loss: 0.00011486844822583306
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_since_restore: 461
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,461,11106.4,461000,0,0,0,387.67




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-09_01-29-58
  done: false
  episode_len_mean: 389.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1261
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6701273375087313
          entropy_coeff: 0.009999999999999998
          kl: 0.013649148731184448
          policy_loss: -0.08988034377495448
          total_loss: -0.09602314391069942
          vf_explained_var: -0.43968719244003296
          vf_loss: 0.00019365167875851815
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterations_since_restore: 462
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,462,11145,462000,0,0,0,389.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-09_01-30-17
  done: false
  episode_len_mean: 389.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1263
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6382082157664828
          entropy_coeff: 0.009999999999999998
          kl: 0.01362451852782461
          policy_loss: -0.04150908034708765
          total_loss: -0.04744942904346519
          vf_explained_var: -1.0
          vf_loss: 9.561540589654921e-05
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iterations_since_restore: 463
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,463,11164,463000,0,0,0,389.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-09_01-30-39
  done: false
  episode_len_mean: 391.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1266
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.74645025200314
          entropy_coeff: 0.009999999999999998
          kl: 0.010911134758709688
          policy_loss: -0.08095615715202358
          total_loss: -0.09004064376155535
          vf_explained_var: -0.9270188212394714
          vf_loss: 9.437496878995767e-05
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_since_restore: 464
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,464,11185.9,464000,0,0,0,391.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-09_01-31-05
  done: false
  episode_len_mean: 391.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1269
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.553646371099684
          entropy_coeff: 0.009999999999999998
          kl: 0.00907330768367626
          policy_loss: -0.10963241739405526
          total_loss: -0.11819276213645935
          vf_explained_var: -0.4596026539802551
          vf_loss: 8.607725426878056e-05
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_since_restore: 465
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,465,11212.2,465000,0,0,0,391.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-09_01-31-26
  done: false
  episode_len_mean: 390.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1271
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5972357776429917
          entropy_coeff: 0.009999999999999998
          kl: 0.010011619017603815
          policy_loss: 0.00246341112587187
          total_loss: -0.0036248248484399585
          vf_explained_var: -0.3891628384590149
          vf_loss: 0.0022815529043631005
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_since_restore: 466
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,466,11232.6,466000,0,0,0,390.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-09_01-31-49
  done: false
  episode_len_mean: 388.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1274
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.551030577553643
          entropy_coeff: 0.009999999999999998
          kl: 0.018143994811255875
          policy_loss: -0.04517672682801883
          total_loss: -0.0466045711034288
          vf_explained_var: -0.07305410504341125
          vf_loss: 0.0003043661717027943
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  iterations_since_restore: 467
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,467,11255.3,467000,-0.01,0,-1,388.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-09_01-32-10
  done: false
  episode_len_mean: 389.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1277
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6784674710697598
          entropy_coeff: 0.009999999999999998
          kl: 0.010821760133701952
          policy_loss: -0.06116153804792298
          total_loss: -0.06949323639273644
          vf_explained_var: -0.8511250615119934
          vf_loss: 0.00023520183070407558
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  iterations_since_restore: 468
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,468,11277,468000,-0.01,0,-1,389.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-09_01-32-32
  done: false
  episode_len_mean: 389.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1280
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.652603366639879
          entropy_coeff: 0.009999999999999998
          kl: 0.010116302411322348
          policy_loss: -0.050187790538701746
          total_loss: -0.05872194812529617
          vf_explained_var: -0.8545001745223999
          vf_loss: 0.00030981083206521967
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_since_restore: 469
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,469,11299,469000,-0.01,0,-1,389.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-09_01-32-52
  done: false
  episode_len_mean: 388.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1282
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6499288386768765
          entropy_coeff: 0.009999999999999998
          kl: 0.012497619370324545
          policy_loss: -0.056223497456974456
          total_loss: -0.0629791200781862
          vf_explained_var: -0.4984990060329437
          vf_loss: 0.00025328508434338597
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations_since_restore: 470
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,470,11319,470000,-0.01,0,-1,388.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-09_01-33-16
  done: false
  episode_len_mean: 385.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1285
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6810951617028977
          entropy_coeff: 0.009999999999999998
          kl: 0.01267606488352663
          policy_loss: -0.09040320170008474
          total_loss: -0.09737952045268483
          vf_explained_var: -0.3809768259525299
          vf_loss: 0.00020874834869270367
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iterations_since_restore: 471
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,471,11342.5,471000,-0.01,0,-1,385.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-09_01-33-40
  done: false
  episode_len_mean: 384.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1288
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3937655687332153
          entropy_coeff: 0.009999999999999998
          kl: 0.011823053328344576
          policy_loss: -0.08940744520061546
          total_loss: -0.09421905929015742
          vf_explained_var: -0.5112928748130798
          vf_loss: 0.0001479105003656716
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iterations_since_restore: 472
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,472,11366.3,472000,-0.01,0,-1,384.13




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-09_01-34-20
  done: false
  episode_len_mean: 383.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1291
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.639291889137692
          entropy_coeff: 0.009999999999999998
          kl: 0.011738136374806647
          policy_loss: -0.0663531002899011
          total_loss: -0.07369940703113874
          vf_explained_var: -0.41946178674697876
          vf_loss: 0.00013296329925651662
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iterations_since_restore: 473
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,473,11406.8,473000,-0.01,0,-1,383.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-09_01-34-44
  done: false
  episode_len_mean: 379.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5476936009195117
          entropy_coeff: 0.009999999999999998
          kl: 0.010002262885189397
          policy_loss: -0.09177137253185114
          total_loss: -0.09955004093547662
          vf_explained_var: -1.0
          vf_loss: 0.00010280264610224145
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_since_restore: 474
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,474,11430.1,474000,-0.01,0,-1,379.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-09_01-34-59
  done: false
  episode_len_mean: 382.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1296
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4530585606892903
          entropy_coeff: 0.009999999999999998
          kl: 0.01156785461169524
          policy_loss: -0.08486971279813184
          total_loss: -0.09035071324970988
          vf_explained_var: -0.7384229302406311
          vf_loss: 0.0002652453852028379
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations_since_restore: 475
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,475,11445.9,475000,-0.01,0,-1,382.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-09_01-35-22
  done: false
  episode_len_mean: 380.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1299
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568127115567524
          entropy_coeff: 0.009999999999999998
          kl: 0.008879413275665252
          policy_loss: -0.051430048834946424
          total_loss: -0.06217777062621382
          vf_explained_var: -0.652769148349762
          vf_loss: 7.760208868098238e-05
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_since_restore: 476
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,476,11468.8,476000,-0.01,0,-1,380.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-09_01-35-46
  done: false
  episode_len_mean: 379.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1301
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.531813508934445
          entropy_coeff: 0.009999999999999998
          kl: 0.014793666665818172
          policy_loss: -0.088624167525106
          total_loss: -0.09255898056758774
          vf_explained_var: -0.28621038794517517
          vf_loss: 0.00014938027412022672
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iterations_since_restore: 477
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,477,11492.1,477000,-0.01,0,-1,379.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-09_01-36-11
  done: false
  episode_len_mean: 375.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 1305
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4010316411654153
          entropy_coeff: 0.009999999999999998
          kl: 0.01281271734620246
          policy_loss: -0.12486479977766672
          total_loss: -0.12903879632552465
          vf_explained_var: -0.32451894879341125
          vf_loss: 0.00010666242099735731
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations_since_restore: 478
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,478,11518,478000,-0.01,0,-1,375.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-09_01-36-34
  done: false
  episode_len_mean: 373.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1307
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.54545882013109
          entropy_coeff: 0.009999999999999998
          kl: 0.016023169026228887
          policy_loss: -0.06552635671363936
          total_loss: -0.06873109431730376
          vf_explained_var: -0.3159128427505493
          vf_loss: 8.225552664953284e-05
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_since_restore: 479
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,479,11540.9,479000,-0.01,0,-1,373.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-09_01-37-00
  done: false
  episode_len_mean: 370.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 1311
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.472715719540914
          entropy_coeff: 0.009999999999999998
          kl: 0.011222867512966812
          policy_loss: -0.0866748519655731
          total_loss: -0.09280338883399963
          vf_explained_var: -0.2513158321380615
          vf_loss: 7.625605176144745e-05
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_since_restore: 480
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,480,11566.3,480000,-0.01,0,-1,370.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-09_01-37-23
  done: false
  episode_len_mean: 370.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1314
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6513430767589146
          entropy_coeff: 0.009999999999999998
          kl: 0.01153828981228865
          policy_loss: -0.10303206551406119
          total_loss: -0.11070914988716443
          vf_explained_var: -0.5466921925544739
          vf_loss: 7.445874774324087e-05
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iterations_since_restore: 481
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,481,11589.5,481000,-0.01,0,-1,370.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-09_01-37-48
  done: false
  episode_len_mean: 367.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1317
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5753336111704508
          entropy_coeff: 0.009999999999999998
          kl: 0.009912256230947096
          policy_loss: -0.0303153270855546
          total_loss: -0.038478991927372086
          vf_explained_var: -0.20684745907783508
          vf_loss: 6.255296231958912e-05
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_since_restore: 482
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,482,11614.2,482000,-0.01,0,-1,367.03




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-09_01-38-27
  done: false
  episode_len_mean: 363.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1320
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.563938311735789
          entropy_coeff: 0.009999999999999998
          kl: 0.013535109091499306
          policy_loss: -0.06743902857932779
          total_loss: -0.07274332601163122
          vf_explained_var: -0.16783691942691803
          vf_loss: 5.686158603768692e-05
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterations_since_restore: 483
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,483,11653.2,483000,-0.01,0,-1,363.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-09_01-38-48
  done: false
  episode_len_mean: 364.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1322
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6183942662345039
          entropy_coeff: 0.009999999999999998
          kl: 0.012802119577623798
          policy_loss: -0.08207244012090895
          total_loss: -0.08846654713981682
          vf_explained_var: -0.26960861682891846
          vf_loss: 6.822544694134397e-05
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_since_restore: 484
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,484,11674.7,484000,-0.01,0,-1,364.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-09_01-39-10
  done: false
  episode_len_mean: 366.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1325
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6226202885309855
          entropy_coeff: 0.009999999999999998
          kl: 0.012892759775208755
          policy_loss: -0.040023996267053816
          total_loss: -0.04637506115767691
          vf_explained_var: -0.2503391206264496
          vf_loss: 8.469831211388939e-05
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_since_restore: 485
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,485,11696.5,485000,-0.01,0,-1,366.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-09_01-39-33
  done: false
  episode_len_mean: 367.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1327
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5771025617917378
          entropy_coeff: 0.009999999999999998
          kl: 0.011238673292215987
          policy_loss: -0.032169592711660595
          total_loss: -0.03935189901126756
          vf_explained_var: -0.37274694442749023
          vf_loss: 5.4352179318407756e-05
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_since_restore: 486
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,486,11719.6,486000,-0.01,0,-1,367.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-09_01-39-56
  done: false
  episode_len_mean: 366.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1330
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1318390680683983
          entropy_coeff: 0.009999999999999998
          kl: 0.008229568571617227
          policy_loss: -0.10901535894307825
          total_loss: -0.11313516369296445
          vf_explained_var: -0.33597931265830994
          vf_loss: 0.0009492561033160504
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_since_restore: 487
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,487,11742.4,487000,-0.01,0,-1,366.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-09_01-40-21
  done: false
  episode_len_mean: 363.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1333
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1985067811277177
          entropy_coeff: 0.009999999999999998
          kl: 0.009947923996048935
          policy_loss: -0.14837189378837745
          total_loss: -0.1527361528740989
          vf_explained_var: -0.27866867184638977
          vf_loss: 6.660374589652444e-05
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_since_restore: 488
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,488,11767.1,488000,-0.01,0,-1,363.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-09_01-40-41
  done: false
  episode_len_mean: 363.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1336
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6003145429823133
          entropy_coeff: 0.009999999999999998
          kl: 0.015807100372138956
          policy_loss: -0.047546964262922604
          total_loss: -0.05147247061961227
          vf_explained_var: -0.9457694888114929
          vf_loss: 7.412136106318535e-05
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since_restore: 489
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,489,11787.2,489000,-0.01,0,-1,363.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-09_01-41-05
  done: false
  episode_len_mean: 361.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1339
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5299314936002095
          entropy_coeff: 0.009999999999999998
          kl: 0.01558018608675755
          policy_loss: -0.015513771006630527
          total_loss: -0.018917401600629092
          vf_explained_var: -0.5403381586074829
          vf_loss: 6.448204836083783e-05
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_restore: 490
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,490,11811.7,490000,-0.01,0,-1,361.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-09_01-41-29
  done: false
  episode_len_mean: 360.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1342
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.457689462767707
          entropy_coeff: 0.009999999999999998
          kl: 0.010364496154005564
          policy_loss: -0.030491063474780984
          total_loss: -0.03710530870076683
          vf_explained_var: 0.12000738829374313
          vf_loss: 9.210996407394608e-05
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since_restore: 491
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,491,11835.2,491000,-0.01,0,-1,360.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-09_01-41-51
  done: false
  episode_len_mean: 360.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1344
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3352429575390286
          entropy_coeff: 0.009999999999999998
          kl: 0.014731188198458749
          policy_loss: -0.09551454550690121
          total_loss: -0.09751048998700249
          vf_explained_var: 0.02184152603149414
          vf_loss: 0.00016998996854656272
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_since_restore: 492
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,492,11857.5,492000,-0.01,0,-1,360.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-09_01-42-10
  done: false
  episode_len_mean: 359.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1346
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6481588350401983
          entropy_coeff: 0.009999999999999998
          kl: 0.013284488661369744
          policy_loss: -0.09674452828864256
          total_loss: -0.1029523187627395
          vf_explained_var: -0.23142296075820923
          vf_loss: 0.0001858865273081594
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_restore: 493
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,493,11876.2,493000,-0.01,0,-1,359.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-09_01-42-33
  done: false
  episode_len_mean: 359.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1349
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.584521594312456
          entropy_coeff: 0.009999999999999998
          kl: 0.011482862831374282
          policy_loss: -0.06444529650939836
          total_loss: -0.07149629356960456
          vf_explained_var: -0.531182050704956
          vf_loss: 7.442154362959425e-05
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_since_restore: 494
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,494,11899.1,494000,-0.01,0,-1,359.92




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-09_01-43-14
  done: false
  episode_len_mean: 360.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1352
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8882259448369345
          entropy_coeff: 0.009999999999999998
          kl: 0.011905151706253983
          policy_loss: -0.07397758312937286
          total_loss: -0.08374416774345769
          vf_explained_var: -1.0
          vf_loss: 7.520310775564415e-05
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since_restore: 495
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,495,11939.8,495000,-0.01,0,-1,360.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-09_01-43-36
  done: false
  episode_len_mean: 360.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1355
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6205295986599393
          entropy_coeff: 0.009999999999999998
          kl: 0.008866575127903085
          policy_loss: -0.11397646011577713
          total_loss: -0.12335949745029211
          vf_explained_var: -0.8576387166976929
          vf_loss: 8.920193544731268e-05
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_since_restore: 496
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,496,11962.6,496000,-0.01,0,-1,360.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-09_01-44-02
  done: false
  episode_len_mean: 359.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1358
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.524423619111379
          entropy_coeff: 0.009999999999999998
          kl: 0.009669638800826401
          policy_loss: -0.09181736649738417
          total_loss: -0.0996610374086433
          vf_explained_var: -0.9775478839874268
          vf_loss: 5.768459422041714e-05
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_since_restore: 497
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,497,11987.7,497000,-0.01,0,-1,359.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-09_01-44-25
  done: false
  episode_len_mean: 358.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1361
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4473023295402527
          entropy_coeff: 0.009999999999999998
          kl: 0.011188084035728312
          policy_loss: -0.05823977179825306
          total_loss: -0.06412122121287717
          vf_explained_var: -0.6016881465911865
          vf_loss: 9.562385097928604e-05
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_since_restore: 498
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,498,12010.9,498000,-0.01,0,-1,358.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-09_01-44-48
  done: false
  episode_len_mean: 357.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1364
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5528779639138115
          entropy_coeff: 0.009999999999999998
          kl: 0.01188434860456255
          policy_loss: -0.10286198165267706
          total_loss: -0.1093050256371498
          vf_explained_var: -0.8750485777854919
          vf_loss: 6.105958632866128e-05
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_since_restore: 499
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,499,12033.9,499000,-0.01,0,-1,357.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-09_01-45-12
  done: false
  episode_len_mean: 356.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1367
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7499024894502428
          entropy_coeff: 0.009999999999999998
          kl: 0.0117685050052548
          policy_loss: -0.056345478941996895
          total_loss: -0.06484086782568031
          vf_explained_var: -1.0
          vf_loss: 6.692815978668579e-05
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations_since_restore: 500
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,500,12058,500000,-0.01,0,-1,356.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-09_01-45-35
  done: false
  episode_len_mean: 358.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1369
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.701946368482378
          entropy_coeff: 0.009999999999999998
          kl: 0.013063347448156352
          policy_loss: -0.04108893266982502
          total_loss: -0.04814110973642932
          vf_explained_var: -0.6328403949737549
          vf_loss: 4.730905157986045e-05
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_since_restore: 501
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,501,12081.1,501000,-0.01,0,-1,358.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-09_01-46-00
  done: false
  episode_len_mean: 356.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1372
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6139410217603047
          entropy_coeff: 0.009999999999999998
          kl: 0.011559613543710611
          policy_loss: -0.02247989842047294
          total_loss: -0.029793639067146514
          vf_explained_var: -1.0
          vf_loss: 4.758876930281986e-05
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_since_restore: 502
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,502,12105.7,502000,0,0,0,356.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-09_01-46-19
  done: false
  episode_len_mean: 358.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1375
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4504527568817138
          entropy_coeff: 0.009999999999999998
          kl: 0.014716461286934882
          policy_loss: -0.0724834193682505
          total_loss: -0.07571494793519377
          vf_explained_var: -0.17565685510635376
          vf_loss: 9.76874457086928e-05
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_since_restore: 503
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,503,12125.1,503000,0,0,0,358.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-09_01-46-46
  done: false
  episode_len_mean: 356.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1378
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.670601134830051
          entropy_coeff: 0.009999999999999998
          kl: 0.012892261961126689
          policy_loss: -0.05652306011567513
          total_loss: -0.06338974322295851
          vf_explained_var: -0.1309634894132614
          vf_loss: 4.9265068001760585e-05
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_since_restore: 504
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,504,12151.6,504000,0,0,0,356.22




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-09_01-47-26
  done: false
  episode_len_mean: 355.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1381
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5526411652565002
          entropy_coeff: 0.009999999999999998
          kl: 0.011205968522802692
          policy_loss: -0.0490610384899709
          total_loss: -0.05602990958011812
          vf_explained_var: -1.0
          vf_loss: 4.8007931622527796e-05
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_restore: 505
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,505,12192.2,505000,0,0,0,355.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-09_01-47-49
  done: false
  episode_len_mean: 355.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1383
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6621218336953056
          entropy_coeff: 0.009999999999999998
          kl: 0.01119139675017582
          policy_loss: -0.05729262077042626
          total_loss: -0.0653428700985387
          vf_explained_var: -0.1686820387840271
          vf_loss: 7.250686455032944e-05
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since_restore: 506
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,506,12215.2,506000,0,0,0,355.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-09_01-48-12
  done: false
  episode_len_mean: 355.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1386
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7542192061742148
          entropy_coeff: 0.009999999999999998
          kl: 0.010282529277790785
          policy_loss: -0.025656603628562556
          total_loss: -0.0353529694593615
          vf_explained_var: -0.9679151773452759
          vf_loss: 3.753105138457613e-05
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_since_restore: 507
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,507,12237.6,507000,0,0,0,355.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-09_01-48-36
  done: false
  episode_len_mean: 356.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1389
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7338908116022747
          entropy_coeff: 0.009999999999999998
          kl: 0.013419492137876275
          policy_loss: -0.1131365720803539
          total_loss: -0.12024136955539386
          vf_explained_var: -0.41598984599113464
          vf_loss: 4.3681751736181066e-05
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterations_since_restore: 508
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,508,12261.8,508000,0,0,0,356.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-09_01-49-00
  done: false
  episode_len_mean: 356.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1392
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.83587593237559
          entropy_coeff: 0.009999999999999998
          kl: 0.010479838705990676
          policy_loss: -0.0906600219094091
          total_loss: -0.10102662882871098
          vf_explained_var: -0.5317685008049011
          vf_loss: 3.402730357669902e-05
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,509,12285.9,509000,0,0,0,356.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-09_01-49-26
  done: false
  episode_len_mean: 355.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1395
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6816505352656046
          entropy_coeff: 0.009999999999999998
          kl: 0.010673729863532858
          policy_loss: -0.04960759071012338
          total_loss: -0.05828808041082488
          vf_explained_var: -0.5890124440193176
          vf_loss: 3.065286883712462e-05
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,510,12311.7,510000,0,0,0,355.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-09_01-49-49
  done: false
  episode_len_mean: 351.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1398
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6912383317947388
          entropy_coeff: 0.009999999999999998
          kl: 0.010399319696545846
          policy_loss: -0.04895837778846423
          total_loss: -0.05794606092903349
          vf_explained_var: -0.7531994581222534
          vf_loss: 2.7715974566591386e-05
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,511,12334.8,511000,0,0,0,351.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-09_01-50-13
  done: false
  episode_len_mean: 351.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1401
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6208749837345546
          entropy_coeff: 0.009999999999999998
          kl: 0.005253666466839648
          policy_loss: 0.004428325759039985
          total_loss: 0.008522257539961072
          vf_explained_var: -0.2639956772327423
          vf_loss: 0.016313176413273647
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations_since_restore: 512
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,512,12358.8,512000,-0.02,0,-2,351.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-09_01-50-38
  done: false
  episode_len_mean: 351.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1404
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9342079440752664
          entropy_coeff: 0.009999999999999998
          kl: 0.012995666747845549
          policy_loss: -0.1260538772576385
          total_loss: -0.13374265519281228
          vf_explained_var: 0.3391932249069214
          vf_loss: 0.0017847136620225178
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,513,12384.1,513000,-0.02,0,-2,351.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-09_01-51-02
  done: false
  episode_len_mean: 352.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1407
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.84625778860516
          entropy_coeff: 0.009999999999999998
          kl: 0.011257162334262175
          policy_loss: -0.04309538311014573
          total_loss: -0.051489828195836806
          vf_explained_var: -0.21533611416816711
          vf_loss: 0.0015197280981940114
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  iterations_since_restore: 514
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,514,12407.3,514000,-0.02,0,-2,352.05




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-09_01-51-44
  done: false
  episode_len_mean: 351.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1410
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4772862811883292
          entropy_coeff: 0.009999999999999998
          kl: 0.011792832296987143
          policy_loss: -0.10128103631238143
          total_loss: -0.10507675487962034
          vf_explained_var: 0.2008536159992218
          vf_loss: 0.0020219676652535176
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 515
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,515,12449.3,515000,-0.02,0,-2,351.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-09_01-52-08
  done: false
  episode_len_mean: 352.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1413
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8045495099491544
          entropy_coeff: 0.009999999999999998
          kl: 0.010703951895615245
          policy_loss: -0.12626923695206643
          total_loss: -0.1355537085586952
          vf_explained_var: 0.023100070655345917
          vf_loss: 0.0006327128682945234
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 516
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,516,12473.9,516000,-0.02,0,-2,352.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-09_01-52-28
  done: false
  episode_len_mean: 353.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1415
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4491006440586514
          entropy_coeff: 0.009999999999999998
          kl: 0.016459454460706978
          policy_loss: -0.029509567634926903
          total_loss: -0.030985107604000302
          vf_explained_var: 0.1867828220129013
          vf_loss: 0.0005165699773189974
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 517
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,517,12494,517000,-0.02,0,-2,353.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-09_01-52-48
  done: false
  episode_len_mean: 356.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1418
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.529207718372345
          entropy_coeff: 0.009999999999999998
          kl: 0.011798550939997179
          policy_loss: -0.12464568043748538
          total_loss: -0.13043165107568105
          vf_explained_var: -0.024915160611271858
          vf_loss: 0.0005465840642702662
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 518
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,518,12513.8,518000,-0.02,0,-2,356.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-09_01-53-15
  done: false
  episode_len_mean: 354.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1421
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.49331517484453
          entropy_coeff: 0.009999999999999998
          kl: 0.011965986369984015
          policy_loss: -0.050863323857386906
          total_loss: -0.056400227463907666
          vf_explained_var: -0.8183623552322388
          vf_loss: 0.0003095770421269764
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 519
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,519,12540.7,519000,-0.02,0,-2,354.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-09_01-53-41
  done: false
  episode_len_mean: 352.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1424
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.607846286561754
          entropy_coeff: 0.009999999999999998
          kl: 0.011740112071452952
          policy_loss: -0.028950963479777176
          total_loss: -0.03595684112774001
          vf_explained_var: -0.783704400062561
          vf_loss: 0.0001574389969593742
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_since_restore: 520
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,520,12566.7,520000,-0.02,0,-2,352.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-09_01-54-05
  done: false
  episode_len_mean: 352.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1427
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.514551705784268
          entropy_coeff: 0.009999999999999998
          kl: 0.010230649657341178
          policy_loss: -0.06049804402929213
          total_loss: -0.0677035875721938
          vf_explained_var: -0.9429975748062134
          vf_loss: 0.00017107510171222707
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_since_restore: 521
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,521,12590.6,521000,-0.02,0,-2,352.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-09_01-54-32
  done: false
  episode_len_mean: 350.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1430
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.631759840912289
          entropy_coeff: 0.009999999999999998
          kl: 0.01411628678541348
          policy_loss: -0.06140938881370756
          total_loss: -0.06689368736826712
          vf_explained_var: -0.4826204180717468
          vf_loss: 0.00011374363635291552
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iterations_since_restore: 522
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,522,12617.7,522000,-0.02,0,-2,350.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-09_01-54-55
  done: false
  episode_len_mean: 351.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1433
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6790735999743143
          entropy_coeff: 0.009999999999999998
          kl: 0.0063741653343734744
          policy_loss: -0.030014865038295587
          total_loss: -0.04155075408311354
          vf_explained_var: -0.3408762812614441
          vf_loss: 0.00041446623427974475
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_since_restore: 523
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,523,12641.1,523000,-0.02,0,-2,351.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-09_01-55-21
  done: false
  episode_len_mean: 349.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1436
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5753128237194485
          entropy_coeff: 0.009999999999999998
          kl: 0.010621119629237391
          policy_loss: -0.05885400345755948
          total_loss: -0.066448298531274
          vf_explained_var: -0.42642080783843994
          vf_loss: 9.342170976904325e-05
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterations_since_restore: 524
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,524,12666.4,524000,-0.02,0,-2,349.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-09_01-55-47
  done: false
  episode_len_mean: 348.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1439
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6165418929523891
          entropy_coeff: 0.009999999999999998
          kl: 0.010967444335186361
          policy_loss: -0.04196239295932982
          total_loss: -0.0497263519714276
          vf_explained_var: -0.9159966111183167
          vf_loss: 7.305587957186314e-05
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iterations_since_restore: 525
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,525,12692.4,525000,-0.02,0,-2,348.78




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-09_01-56-27
  done: false
  episode_len_mean: 348.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1442
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5899459958076476
          entropy_coeff: 0.009999999999999998
          kl: 0.014496334080496996
          policy_loss: -0.09145626011821958
          total_loss: -0.09621835446192159
          vf_explained_var: -0.6622053980827332
          vf_loss: 0.00012921698370418097
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iterations_since_restore: 526
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,526,12733,526000,-0.02,0,-2,348.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-09_01-56-43
  done: false
  episode_len_mean: 351.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1444
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2051785316732195
          entropy_coeff: 0.009999999999999998
          kl: 0.011467420317212883
          policy_loss: -0.016850643708474108
          total_loss: -0.01990974046703842
          vf_explained_var: -0.4412594139575958
          vf_loss: 0.00028461767150373714
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iterations_since_restore: 527
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,527,12748.5,527000,-0.02,0,-2,351.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-09_01-57-01
  done: false
  episode_len_mean: 353.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1446
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5023659394847022
          entropy_coeff: 0.009999999999999998
          kl: 0.013283109435662549
          policy_loss: -0.06931662704381678
          total_loss: -0.0740873755266269
          vf_explained_var: -0.3478502631187439
          vf_loss: 0.0001660518081431898
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iterations_since_restore: 528
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,528,12766.3,528000,-0.02,0,-2,353.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-09_01-57-21
  done: false
  episode_len_mean: 354.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1448
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3367702033784654
          entropy_coeff: 0.009999999999999998
          kl: 0.010040838546705287
          policy_loss: -0.08745865087128347
          total_loss: -0.09308324154052469
          vf_explained_var: -0.4853944480419159
          vf_loss: 0.00011834816784054662
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iterations_since_restore: 529
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,529,12786.1,529000,-0.02,0,-2,354.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-09_01-57-45
  done: false
  episode_len_mean: 354.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1451
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4804737832811143
          entropy_coeff: 0.009999999999999998
          kl: 0.011580228599836337
          policy_loss: -0.03258751001622942
          total_loss: -0.03851750700010194
          vf_explained_var: -0.4338787794113159
          vf_loss: 8.100447448669001e-05
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iterations_since_restore: 530
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,530,12810.9,530000,-0.02,0,-2,354.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-09_01-58-09
  done: false
  episode_len_mean: 353.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1454
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.719048023223877
          entropy_coeff: 0.009999999999999998
          kl: 0.00999415503953808
          policy_loss: -0.1081568741136127
          total_loss: -0.11767280023131106
          vf_explained_var: -0.99937903881073
          vf_loss: 8.523952957249195e-05
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iterations_since_restore: 531
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,531,12834.6,531000,-0.02,0,-2,353.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-09_01-58-30
  done: false
  episode_len_mean: 355.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1456
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4882250361972384
          entropy_coeff: 0.009999999999999998
          kl: 0.014221176375547939
          policy_loss: -0.09891173024144438
          total_loss: -0.10276633724570275
          vf_explained_var: -0.6121246218681335
          vf_loss: 0.00022843967659961587
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_since_restore: 532
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,532,12855.3,532000,-0.02,0,-2,355.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-09_01-58-56
  done: false
  episode_len_mean: 354.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1459
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7009000831180148
          entropy_coeff: 0.009999999999999998
          kl: 0.016036976912970128
          policy_loss: -0.09461080634759532
          total_loss: -0.09937967442803913
          vf_explained_var: -0.25230076909065247
          vf_loss: 6.20552968030097e-05
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_since_restore: 533
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,533,12881.5,533000,-0.02,0,-2,354.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-09_01-59-19
  done: false
  episode_len_mean: 356.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1462
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.554412583510081
          entropy_coeff: 0.009999999999999998
          kl: 0.012533852713643769
          policy_loss: -0.08236943057013882
          total_loss: -0.08832843489944935
          vf_explained_var: -0.4435076117515564
          vf_loss: 6.722727133213387e-05
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,534,12904.2,534000,-0.02,0,-2,356.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-09_01-59-40
  done: false
  episode_len_mean: 357.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1465
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4642578032281663
          entropy_coeff: 0.009999999999999998
          kl: 0.012926258665946952
          policy_loss: -0.10346640449845129
          total_loss: -0.10824069080667363
          vf_explained_var: -0.9135204553604126
          vf_loss: 5.241134868912114e-05
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,535,12925.6,535000,-0.02,0,-2,357.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-09_02-00-05
  done: false
  episode_len_mean: 356.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1467
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.58309336370892
          entropy_coeff: 0.009999999999999998
          kl: 0.010755744365832499
          policy_loss: -0.07877032132301894
          total_loss: -0.08639488975620932
          vf_explained_var: -0.8419251441955566
          vf_loss: 3.872067304150227e-05
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_since_restore: 536
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,536,12950.6,536000,-0.02,0,-2,356.43




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-09_02-00-49
  done: false
  episode_len_mean: 355.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 1471
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3760669681761
          entropy_coeff: 0.009999999999999998
          kl: 0.019071212558192406
          policy_loss: -0.03576929825875494
          total_loss: -0.034992933107746975
          vf_explained_var: 0.0684710294008255
          vf_loss: 5.4830742940086765e-05
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterations_since_restore: 537
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,537,12994.3,537000,-0.02,0,-2,355.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-09_02-01-16
  done: false
  episode_len_mean: 353.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1474
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4270953019460042
          entropy_coeff: 0.009999999999999998
          kl: 0.011291157804015168
          policy_loss: -0.0586460932261414
          total_loss: -0.06431005174914996
          vf_explained_var: -0.14553210139274597
          vf_loss: 3.27702584804178e-05
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations_since_restore: 538
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,538,13021.7,538000,-0.02,0,-2,353.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-09_02-01-42
  done: false
  episode_len_mean: 353.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1477
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4413462387190925
          entropy_coeff: 0.009999999999999998
          kl: 0.010304752474684343
          policy_loss: -0.08269635799030463
          total_loss: -0.08924347004956669
          vf_explained_var: -0.9480804204940796
          vf_loss: 4.117776311937228e-05
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations_since_restore: 539
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,539,13047,539000,-0.02,0,-2,353.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-09_02-02-03
  done: false
  episode_len_mean: 352.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1479
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4287496474054124
          entropy_coeff: 0.009999999999999998
          kl: 0.012859038350932097
          policy_loss: -0.06667897476711207
          total_loss: -0.071125063393265
          vf_explained_var: -0.504307746887207
          vf_loss: 7.657267904303606e-05
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations_since_restore: 540
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,540,13068.6,540000,-0.02,0,-2,352.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-09_02-02-25
  done: false
  episode_len_mean: 356.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1482
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5203412188424004
          entropy_coeff: 0.009999999999999998
          kl: 0.015271296148615082
          policy_loss: -0.10012074851741394
          total_loss: -0.103679175923268
          vf_explained_var: -0.5836342573165894
          vf_loss: 4.834701667277841e-05
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations_since_restore: 541
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,541,13090.5,541000,-0.02,0,-2,356.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-09_02-02-51
  done: false
  episode_len_mean: 355.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1485
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.381309688091278
          entropy_coeff: 0.009999999999999998
          kl: 0.010790087968647496
          policy_loss: -0.03288728296756745
          total_loss: -0.03847459347711669
          vf_explained_var: -0.11915059387683868
          vf_loss: 3.20602265573042e-05
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_since_restore: 542
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,542,13115.8,542000,-0.02,0,-2,355.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-09_02-03-10
  done: false
  episode_len_mean: 357.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1487
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4009823474619123
          entropy_coeff: 0.009999999999999998
          kl: 0.016694902063508286
          policy_loss: -0.06629390836589866
          total_loss: -0.06756720563603771
          vf_explained_var: -0.25908637046813965
          vf_loss: 5.88346344253902e-05
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations_since_restore: 543
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,543,13135.2,543000,-0.02,0,-2,357.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-09_02-03-35
  done: false
  episode_len_mean: 357.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1490
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3195371945699057
          entropy_coeff: 0.009999999999999998
          kl: 0.012898399212240482
          policy_loss: -0.06676267840796046
          total_loss: -0.07013578514258066
          vf_explained_var: -0.7117365598678589
          vf_loss: 2.7543832705608413e-05
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_since_restore: 544
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,544,13160.6,544000,-0.02,0,-2,357.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-09_02-03-59
  done: false
  episode_len_mean: 358.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1493
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3607687579260932
          entropy_coeff: 0.009999999999999998
          kl: 0.01458854909776848
          policy_loss: -0.08076102220349841
          total_loss: -0.08324826914403173
          vf_explained_var: -0.14009293913841248
          vf_loss: 4.226167319656169e-05
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations_since_restore: 545
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,545,13184.3,545000,-0.02,0,-2,358.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-09_02-04-20
  done: false
  episode_len_mean: 361.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1495
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.032630470726225
          entropy_coeff: 0.009999999999999998
          kl: 0.008434102192568262
          policy_loss: -0.047731202499320106
          total_loss: -0.05139721374337872
          vf_explained_var: -0.8006742000579834
          vf_loss: 0.000255647837356163
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_since_restore: 546
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,546,13204.8,546000,-0.02,0,-2,361.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-09_02-04-43
  done: false
  episode_len_mean: 361.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1497
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4236684560775756
          entropy_coeff: 0.009999999999999998
          kl: 0.012416007922345863
          policy_loss: -0.12569762302769555
          total_loss: -0.13047472453779643
          vf_explained_var: -0.5880314111709595
          vf_loss: 3.117826195698904e-05
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iterations_since_restore: 547
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,547,13227.7,547000,-0.02,0,-2,361.13




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-09_02-05-22
  done: false
  episode_len_mean: 361.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1500
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.304884299967024
          entropy_coeff: 0.009999999999999998
          kl: 0.012778773318517424
          policy_loss: -0.038433499013384186
          total_loss: -0.04174748319718573
          vf_explained_var: -0.7436173558235168
          vf_loss: 3.097943584483194e-05
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_since_restore: 548
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,548,13266.8,548000,-0.02,0,-2,361.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-09_02-05-47
  done: false
  episode_len_mean: 363.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1503
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4640905883577136
          entropy_coeff: 0.009999999999999998
          kl: 0.010891141503170745
          policy_loss: -0.1000942774116993
          total_loss: -0.10643691946234968
          vf_explained_var: -0.6328009963035583
          vf_loss: 2.7803103694168386e-05
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_since_restore: 549
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,549,13291.6,549000,0,0,0,363.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-09_02-06-17
  done: false
  episode_len_mean: 361.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1507
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2781717578570049
          entropy_coeff: 0.009999999999999998
          kl: 0.010031482226510648
          policy_loss: -0.04020100434621175
          total_loss: -0.045321957187520134
          vf_explained_var: -0.0949787124991417
          vf_loss: 4.310729003312493e-05
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_since_restore: 550
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,550,13321.7,550000,0,0,0,361.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-09_02-06-37
  done: false
  episode_len_mean: 363.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1509
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.2894412179787953
          entropy_coeff: 0.009999999999999998
          kl: 0.010516839760349386
          policy_loss: -0.04151376295420858
          total_loss: -0.04639662773244911
          vf_explained_var: -0.4222528338432312
          vf_loss: 2.5324126777882663e-05
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations_since_restore: 551
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,551,13342.1,551000,0,0,0,363.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-09_02-06-55
  done: false
  episode_len_mean: 366.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1511
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.305651612414254
          entropy_coeff: 0.009999999999999998
          kl: 0.012444421194218415
          policy_loss: -0.00302617781692081
          total_loss: -0.0065879285335540775
          vf_explained_var: -0.6956638097763062
          vf_loss: 4.4783765204354295e-05
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations_since_restore: 552
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,552,13360.2,552000,0,0,0,366.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-09_02-07-18
  done: false
  episode_len_mean: 366.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1514
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.472162902355194
          entropy_coeff: 0.009999999999999998
          kl: 0.01063804713669659
          policy_loss: -0.05229244509504901
          total_loss: -0.05889812712040213
          vf_explained_var: -0.49633607268333435
          vf_loss: 3.768170054172515e-05
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iterations_since_restore: 553
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,553,13383.2,553000,0,0,0,366.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-09_02-07-44
  done: false
  episode_len_mean: 364.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1517
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.536816868517134
          entropy_coeff: 0.009999999999999998
          kl: 0.012146203320291172
          policy_loss: -0.10073058356841405
          total_loss: -0.10682283838589986
          vf_explained_var: 0.09907054156064987
          vf_loss: 5.2391513731385605e-05
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iterations_since_restore: 554
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,554,13409,554000,0,0,0,364.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-09_02-08-03
  done: false
  episode_len_mean: 364.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1519
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5941193872027928
          entropy_coeff: 0.009999999999999998
          kl: 0.011868632838551541
          policy_loss: -0.025340743756128683
          total_loss: -0.032235761814647254
          vf_explained_var: -0.6539059281349182
          vf_loss: 3.343426285735202e-05
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations_since_restore: 555
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,555,13427.9,555000,0,0,0,364.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-09_02-08-22
  done: false
  episode_len_mean: 366.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1521
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4266328268580966
          entropy_coeff: 0.009999999999999998
          kl: 0.012877872769055543
          policy_loss: -0.09726837852762805
          total_loss: -0.1017287910812431
          vf_explained_var: -0.6923947930335999
          vf_loss: 2.6785013172532975e-05
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iterations_since_restore: 556
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,556,13446.9,556000,0,0,0,366.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-09_02-08-46
  done: false
  episode_len_mean: 368.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1524
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5434849381446838
          entropy_coeff: 0.009999999999999998
          kl: 0.011721327592021503
          policy_loss: -0.06544867989917597
          total_loss: -0.07196023112369908
          vf_explained_var: -0.38470572233200073
          vf_loss: 2.2415229553492586e-05
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  iterations_since_restore: 557
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,557,13470.8,557000,0,0,0,368.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-09_02-09-06
  done: false
  episode_len_mean: 370.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1527
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3144101752175226
          entropy_coeff: 0.009999999999999998
          kl: 0.009714130793013748
          policy_loss: -0.07449691775772306
          total_loss: -0.0802460795475377
          vf_explained_var: 0.15525314211845398
          vf_loss: 1.8269751677103662e-05
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  iterations_since_restore: 558
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,558,13491.1,558000,0,0,0,370.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-09_02-09-21
  done: false
  episode_len_mean: 371.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1528
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.375136564175288
          entropy_coeff: 0.009999999999999998
          kl: 0.010262612179187247
          policy_loss: 0.040487892097897
          total_loss: 0.03458349984139204
          vf_explained_var: -0.17864908277988434
          vf_loss: 5.3804382398286384e-05
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iterations_since_restore: 559
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,559,13506.1,559000,0,0,0,371.76




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-09_02-10-02
  done: false
  episode_len_mean: 374.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1531
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.505987694528368
          entropy_coeff: 0.009999999999999998
          kl: 0.013225863047212667
          policy_loss: -0.11486798454489973
          total_loss: -0.11986281387507916
          vf_explained_var: -0.45547276735305786
          vf_loss: 2.166004100622053e-05
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  iterations_since_restore: 560
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,560,13546.7,560000,0,0,0,374.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-09_02-10-21
  done: false
  episode_len_mean: 378.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1533
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.1438997156090207
          entropy_coeff: 0.009999999999999998
          kl: 0.01217517384905891
          policy_loss: -0.11594647711349858
          total_loss: -0.1181243908074167
          vf_explained_var: 0.40109142661094666
          vf_loss: 1.555890168371358e-05
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  iterations_since_restore: 561
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,561,13565.7,561000,0,0,0,378.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-09_02-10-39
  done: false
  episode_len_mean: 381.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1536
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7360748714870877
          entropy_coeff: 0.009999999999999998
          kl: 0.012834119154007478
          policy_loss: -0.11276438931624094
          total_loss: -0.12034809895687633
          vf_explained_var: -0.38102200627326965
          vf_loss: 3.1131160337988855e-05
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  iterations_since_restore: 562
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,562,13583.4,562000,0,0,0,381.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-09_02-10-59
  done: false
  episode_len_mean: 383.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1538
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5373152322239345
          entropy_coeff: 0.009999999999999998
          kl: 0.015398680037238046
          policy_loss: -0.06835144696136315
          total_loss: -0.07199780703004864
          vf_explained_var: 0.16561144590377808
          vf_loss: 3.3419848356667596e-05
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  iterations_since_restore: 563
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,563,13604.2,563000,0,0,0,383.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-09_02-11-24
  done: false
  episode_len_mean: 383.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1541
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3595508919821846
          entropy_coeff: 0.009999999999999998
          kl: 0.015003633124887965
          policy_loss: -0.031136076255804963
          total_loss: -0.03302798109749953
          vf_explained_var: -0.02118818461894989
          vf_loss: 0.00031022275832381436
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_since_restore: 564
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,564,13629.2,564000,0,0,0,383.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-09_02-11-49
  done: false
  episode_len_mean: 377.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1544
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5145775039990743
          entropy_coeff: 0.009999999999999998
          kl: 0.017949789014614914
          policy_loss: -0.07085059032671981
          total_loss: -0.07230692737632327
          vf_explained_var: 0.32487788796424866
          vf_loss: 5.881654692024717e-05
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_since_restore: 565
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,565,13654,565000,0,0,0,377.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-09_02-12-07
  done: false
  episode_len_mean: 377.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1546
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4307945834265814
          entropy_coeff: 0.009999999999999998
          kl: 0.012664658047516545
          policy_loss: -0.09596435851934883
          total_loss: -0.10062976443312234
          vf_explained_var: 0.037402063608169556
          vf_loss: 2.53144314532013e-05
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterations_since_restore: 566
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,566,13672,566000,0,0,0,377.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-09_02-12-31
  done: false
  episode_len_mean: 375.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1549
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5236395253075494
          entropy_coeff: 0.009999999999999998
          kl: 0.013812631445947134
          policy_loss: -0.02842427788095342
          total_loss: -0.03315115382687913
          vf_explained_var: 0.14395053684711456
          vf_loss: 2.0553757596846684e-05
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iterations_since_restore: 567
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,567,13695.9,567000,0,0,0,375.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-09_02-12-54
  done: false
  episode_len_mean: 376.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1552
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.661959441502889
          entropy_coeff: 0.009999999999999998
          kl: 0.011932193674967678
          policy_loss: -0.06059947709242503
          total_loss: -0.06814208063814375
          vf_explained_var: -0.19312453269958496
          vf_loss: 1.598127064931355e-05
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iterations_since_restore: 568
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,568,13719,568000,0,0,0,376.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-09_02-13-15
  done: false
  episode_len_mean: 378.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1554
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6014206422699822
          entropy_coeff: 0.009999999999999998
          kl: 0.01232463809027198
          policy_loss: -0.10440855088333288
          total_loss: -0.1110358646346463
          vf_explained_var: -0.5024925470352173
          vf_loss: 2.7872976018746462e-05
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iterations_since_restore: 569
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,569,13739.8,569000,0,0,0,378.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-09_02-13-43
  done: false
  episode_len_mean: 374.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1558
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.555326180987888
          entropy_coeff: 0.009999999999999998
          kl: 0.011774060593825287
          policy_loss: -0.06681721089407802
          total_loss: -0.07341458838846948
          vf_explained_var: 0.033731527626514435
          vf_loss: 1.4957457713333295e-05
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations_since_restore: 570
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,570,13767.3,570000,0,0,0,374.96




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-09_02-14-17
  done: false
  episode_len_mean: 377.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1560
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4291928198602464
          entropy_coeff: 0.009999999999999998
          kl: 0.013950642184000194
          policy_loss: -0.007518828122152222
          total_loss: -0.01120320964190695
          vf_explained_var: -0.36856603622436523
          vf_loss: 1.377940197421089e-05
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  iterations_since_restore: 571
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,571,13801.8,571000,0,0,0,377.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-09_02-14-34
  done: false
  episode_len_mean: 381.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1562
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.376937164200677
          entropy_coeff: 0.009999999999999998
          kl: 0.011551724483469725
          policy_loss: -0.06268898877832625
          total_loss: -0.06767132004929913
          vf_explained_var: -0.7221410274505615
          vf_loss: 1.4951853823832708e-05
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  iterations_since_restore: 572
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,572,13818.2,572000,0,0,0,381.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-09_02-14-54
  done: false
  episode_len_mean: 380.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1564
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5915309680832757
          entropy_coeff: 0.009999999999999998
          kl: 0.013084091850690132
          policy_loss: -0.06351260737412506
          total_loss: -0.06947059838308228
          vf_explained_var: -0.9789525270462036
          vf_loss: 2.158840756641843e-05
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
  iterations_since_restore: 573
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,573,13838.2,573000,0,0,0,380.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-09_02-15-12
  done: false
  episode_len_mean: 382.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1566
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5380946000417073
          entropy_coeff: 0.009999999999999998
          kl: 0.013186200545019844
          policy_loss: -0.09857877408050829
          total_loss: -0.10392305126620663
          vf_explained_var: 0.01736128143966198
          vf_loss: 2.3395256963946546e-05
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  iterations_since_restore: 574
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,574,13856.9,574000,0,0,0,382.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-09_02-15-27
  done: false
  episode_len_mean: 387.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1568
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0785884062449138
          entropy_coeff: 0.009999999999999998
          kl: 0.009655914361515746
          policy_loss: -0.03291475160254372
          total_loss: -0.03635940452416738
          vf_explained_var: 0.23668789863586426
          vf_loss: 8.769481610215735e-06
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  iterations_since_restore: 575
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,575,13871.2,575000,0,0,0,387.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-09_02-15-45
  done: false
  episode_len_mean: 390.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1570
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.442619220415751
          entropy_coeff: 0.009999999999999998
          kl: 0.012363249305513137
          policy_loss: -0.09654207858774397
          total_loss: -0.10156728554930952
          vf_explained_var: 0.16930754482746124
          vf_loss: 1.2643978485963696e-05
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  iterations_since_restore: 576
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,576,13889.8,576000,0,0,0,390.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-09_02-16-09
  done: false
  episode_len_mean: 391.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1573
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.3228811588552263
          entropy_coeff: 0.009999999999999998
          kl: 0.011449897373974711
          policy_loss: -0.0191811780548758
          total_loss: -0.023701114373074636
          vf_explained_var: -0.20643028616905212
          vf_loss: 1.411074037302266e-05
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  iterations_since_restore: 577
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,577,13913.9,577000,0,0,0,391.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-09_02-16-28
  done: false
  episode_len_mean: 393.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1575
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.606743990050422
          entropy_coeff: 0.009999999999999998
          kl: 0.014532835954699043
          policy_loss: -0.04511213248802556
          total_loss: -0.050123770079678956
          vf_explained_var: -0.10720177739858627
          vf_loss: 1.9928662166219307e-05
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  iterations_since_restore: 578
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,578,13932.6,578000,0,0,0,393.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-09_02-16-55
  done: false
  episode_len_mean: 394.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1579
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5150191413031684
          entropy_coeff: 0.009999999999999998
          kl: 0.0122823565160108
          policy_loss: -0.0878829303301043
          total_loss: -0.09369588324593173
          vf_explained_var: -0.4381535053253174
          vf_loss: 1.0326563531432638e-05
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  iterations_since_restore: 579
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,579,13959.3,579000,0,0,0,394.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-09_02-17-21
  done: false
  episode_len_mean: 389.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1582
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.4558784630563524
          entropy_coeff: 0.009999999999999998
          kl: 0.011340396664154302
          policy_loss: -0.051612725978096324
          total_loss: -0.05755370569725831
          vf_explained_var: -0.47958725690841675
          vf_loss: 6.190754485639142e-06
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  iterations_since_restore: 580
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,580,13985.9,580000,0,0,0,389.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-09_02-17-45
  done: false
  episode_len_mean: 390.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1585
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6938792043262059
          entropy_coeff: 0.009999999999999998
          kl: 0.00920072818951133
          policy_loss: -0.05393012629614936
          total_loss: -0.06386985062725015
          vf_explained_var: -0.30782631039619446
          vf_loss: 1.2261979266744068e-05
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  iterations_since_restore: 581
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,581,14009.2,581000,0,0,0,390.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-09_02-18-14
  done: false
  episode_len_mean: 385.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1588
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.0685842951138815
          entropy_coeff: 0.009999999999999998
          kl: 0.007870288523656694
          policy_loss: 0.007146955271148019
          total_loss: 0.002683882663647334
          vf_explained_var: -0.8718746900558472
          vf_loss: 0.0002462718399985735
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  iterations_since_restore: 582
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,582,14038.8,582000,0,0,0,385.09




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-09_02-18-54
  done: false
  episode_len_mean: 383.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1591
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5779288861486647
          entropy_coeff: 0.009999999999999998
          kl: 0.012570471106789287
          policy_loss: -0.030832677831252417
          total_loss: -0.037045135680172175
          vf_explained_var: -0.5884190201759338
          vf_loss: 2.113093465696794e-05
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  iterations_since_restore: 583
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,583,14078.5,583000,0,0,0,383.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-09_02-19-22
  done: false
  episode_len_mean: 384.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1594
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5916322098837958
          entropy_coeff: 0.009999999999999998
          kl: 0.009404109327554188
          policy_loss: -0.04611378444565667
          total_loss: -0.05487450791729821
          vf_explained_var: 0.34245577454566956
          vf_loss: 1.435441479568706e-05
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  iterations_since_restore: 584
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,584,14106.6,584000,0,0,0,384.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-09_02-19-45
  done: false
  episode_len_mean: 382.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1597
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7878371040026346
          entropy_coeff: 0.009999999999999998
          kl: 0.012062467928354683
          policy_loss: -0.1040776835133632
          total_loss: -0.11278581586149004
          vf_explained_var: -0.8419157266616821
          vf_loss: 1.0304859056810123e-05
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  iterations_since_restore: 585
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,585,14129.4,585000,0,0,0,382.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-09_02-20-05
  done: false
  episode_len_mean: 381.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1599
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.727369279331631
          entropy_coeff: 0.009999999999999998
          kl: 0.01106334848634288
          policy_loss: -0.06225492676926984
          total_loss: -0.0711231561170684
          vf_explained_var: -0.27696701884269714
          vf_loss: 4.229424073148241e-06
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  iterations_since_restore: 586
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,586,14149.4,586000,0,0,0,381.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-09_02-20-28
  done: false
  episode_len_mean: 383.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1602
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.353705316119724
          entropy_coeff: 0.009999999999999998
          kl: 0.01510958199614194
          policy_loss: -0.0545007794474562
          total_loss: -0.05655343751940462
          vf_explained_var: -0.3801915943622589
          vf_loss: 1.0556838929510882e-05
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterations_since_restore: 587
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,587,14171.8,587000,0,0,0,383.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-09_02-20-45
  done: false
  episode_len_mean: 386.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1604
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6677114817831251
          entropy_coeff: 0.009999999999999998
          kl: 0.011229119849577767
          policy_loss: -0.10206113569438457
          total_loss: -0.11020403313967916
          vf_explained_var: -0.9853435754776001
          vf_loss: 7.1056627373561945e-06
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  iterations_since_restore: 588
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,588,14189.3,588000,0,0,0,386.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-09_02-21-05
  done: false
  episode_len_mean: 389.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1606
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7503530396355522
          entropy_coeff: 0.009999999999999998
          kl: 0.013223138068380852
          policy_loss: -0.1283558963901467
          total_loss: -0.13580907703273826
          vf_explained_var: -0.5146456956863403
          vf_loss: 9.028455684124815e-06
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  iterations_since_restore: 589
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,589,14209.4,589000,0,0,0,389.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-09_02-21-23
  done: false
  episode_len_mean: 392.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1608
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7040555874506633
          entropy_coeff: 0.009999999999999998
          kl: 0.012742436590400391
          policy_loss: -0.09261688080926736
          total_loss: -0.09997445580859979
          vf_explained_var: -0.5232385993003845
          vf_loss: 6.6969795120207386e-06
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  iterations_since_restore: 590
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,590,14227.2,590000,0,0,0,392.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-09_02-21-48
  done: false
  episode_len_mean: 389.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1611
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.486534105406867
          entropy_coeff: 0.009999999999999998
          kl: 0.011409689153685015
          policy_loss: -0.10686323046684265
          total_loss: -0.1130564192103015
          vf_explained_var: -0.48147544264793396
          vf_loss: 7.91843546165991e-06
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  iterations_since_restore: 591
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,591,14251.9,591000,0,0,0,389.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-09_02-22-12
  done: false
  episode_len_mean: 389.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1614
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7216019617186653
          entropy_coeff: 0.009999999999999998
          kl: 0.01434643073312311
          policy_loss: -0.07599409117052952
          total_loss: -0.08230698214222988
          vf_explained_var: -0.8379843235015869
          vf_loss: 8.807046363775346e-06
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  iterations_since_restore: 592
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,592,14276.3,592000,0,0,0,389.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-09_02-22-33
  done: false
  episode_len_mean: 391.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1617
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.655114037460751
          entropy_coeff: 0.009999999999999998
          kl: 0.01237915464405459
          policy_loss: -0.05353513370371527
          total_loss: -0.0606794574815366
          vf_explained_var: -0.3942295014858246
          vf_loss: 6.396345005062661e-06
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  iterations_since_restore: 593
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,593,14297.5,593000,0,0,0,391.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-09_02-22-55
  done: false
  episode_len_mean: 389.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1619
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.652968207995097
          entropy_coeff: 0.009999999999999998
          kl: 0.016016997230716423
          policy_loss: -0.04010341606206364
          total_loss: -0.04446565678550137
          vf_explained_var: -0.12296094000339508
          vf_loss: 4.533752404414473e-06
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  iterations_since_restore: 594
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,594,14318.7,594000,0,0,0,389.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-09_02-23-31
  done: false
  episode_len_mean: 389.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1622
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7544606102837457
          entropy_coeff: 0.009999999999999998
          kl: 0.012555848912892662
          policy_loss: -0.04059472783572144
          total_loss: -0.04859870556328032
          vf_explained_var: -0.6305664777755737
          vf_loss: 6.028564828536926e-06
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  iterations_since_restore: 595
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,595,14355.6,595000,0,0,0,389.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-09_02-23-51
  done: false
  episode_len_mean: 391.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1624
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6916138966878256
          entropy_coeff: 0.009999999999999998
          kl: 0.020325325527939676
          policy_loss: -0.09063304877943462
          total_loss: -0.09210861797134082
          vf_explained_var: -0.8551631569862366
          vf_loss: 6.027540194332283e-06
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  iterations_since_restore: 596
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,596,14375.4,596000,0,0,0,391.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-09_02-24-10
  done: false
  episode_len_mean: 390.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1626
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.835620137055715
          entropy_coeff: 0.009999999999999998
          kl: 0.008928086499949117
          policy_loss: -0.1326440465533071
          total_loss: -0.14082445299459828
          vf_explained_var: -0.6209739446640015
          vf_loss: 6.147619999157743e-06
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  iterations_since_restore: 597
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,597,14394.1,597000,0,0,0,390.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-09_02-24-35
  done: false
  episode_len_mean: 387.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1629
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7412639021873475
          entropy_coeff: 0.009999999999999998
          kl: 0.008699670980734374
          policy_loss: -0.029098178984390363
          total_loss: -0.03659786987635824
          vf_explained_var: -0.5004902482032776
          vf_loss: 3.4783694117221686e-06
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_since_restore: 598
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,598,14418.9,598000,0,0,0,387.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-09_02-24-55
  done: false
  episode_len_mean: 387.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1632
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6951674673292372
          entropy_coeff: 0.009999999999999998
          kl: 0.012075797293455457
          policy_loss: -0.08279592775636249
          total_loss: -0.08598259588082631
          vf_explained_var: -0.23687438666820526
          vf_loss: 9.917677922051855e-06
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iterations_since_restore: 599
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,599,14439.2,599000,0,0,0,387.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-09_02-25-15
  done: false
  episode_len_mean: 388.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1634
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6454181406233046
          entropy_coeff: 0.009999999999999998
          kl: 0.00937308782753795
          policy_loss: -0.04568516498224603
          total_loss: -0.051434759112695856
          vf_explained_var: -0.19206097722053528
          vf_loss: 2.805470087170963e-05
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_since_restore: 600
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,600,14459,600000,0,0,0,388.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-09_02-25-33
  done: false
  episode_len_mean: 387.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1636
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6798663112852308
          entropy_coeff: 0.009999999999999998
          kl: 0.00938914536831076
          policy_loss: -0.05209310830881198
          total_loss: -0.05819344359139601
          vf_explained_var: -0.6622665524482727
          vf_loss: 3.5039167275701606e-06
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  iterations_since_restore: 601
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,601,14477,601000,0,0,0,387.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-09_02-25-53
  done: false
  episode_len_mean: 388.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1638
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8009124173058404
          entropy_coeff: 0.009999999999999998
          kl: 0.010971765364644644
          policy_loss: -0.1083044512818257
          total_loss: -0.1138005325363742
          vf_explained_var: -0.8365920186042786
          vf_loss: 1.551678468179792e-05
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  iterations_since_restore: 602
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,602,14496.7,602000,0,0,0,388.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-09_02-26-14
  done: false
  episode_len_mean: 390.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1641
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7678934362199572
          entropy_coeff: 0.009999999999999998
          kl: 0.009008882686417724
          policy_loss: -0.1181544892075989
          total_loss: -0.12556593529880047
          vf_explained_var: -0.8499259352684021
          vf_loss: 5.808014605514068e-06
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000
  iterations_since_restore: 603
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,603,14518.2,603000,0,0,0,390.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-09_02-26-35
  done: false
  episode_len_mean: 393.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1644
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.80310568412145
          entropy_coeff: 0.009999999999999998
          kl: 0.008620577492790287
          policy_loss: -0.09141800697478983
          total_loss: -0.09962446370886432
          vf_explained_var: -0.7561880946159363
          vf_loss: 5.221608768756091e-06
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
  iterations_since_restore: 604
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,604,14539,604000,0,0,0,393.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-09_02-26-56
  done: false
  episode_len_mean: 391.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1646
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5314434263441297
          entropy_coeff: 0.009999999999999998
          kl: 0.01040732365410116
          policy_loss: -0.09548536667393313
          total_loss: -0.09893927002946536
          vf_explained_var: -0.14270485937595367
          vf_loss: 5.936892593025631e-06
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  iterations_since_restore: 605
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,605,14559.7,605000,0,0,0,391.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-09_02-27-17
  done: false
  episode_len_mean: 392.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1649
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5176064954863655
          entropy_coeff: 0.009999999999999998
          kl: 0.007976841011542233
          policy_loss: -0.08565302215930488
          total_loss: -0.09173960611224174
          vf_explained_var: -0.7934626340866089
          vf_loss: 3.361407233468425e-06
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
  iterations_since_restore: 606
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,606,14580.5,606000,0,0,0,392.47




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-09_02-27-56
  done: false
  episode_len_mean: 391.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1651
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5591128720177545
          entropy_coeff: 0.009999999999999998
          kl: 0.010361304560369591
          policy_loss: -0.08889675941318273
          total_loss: -0.09267960755775372
          vf_explained_var: -0.8304995894432068
          vf_loss: 6.1070999373037515e-06
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
  iterations_since_restore: 607
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,607,14620.1,607000,0,0,0,391.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-09_02-28-17
  done: false
  episode_len_mean: 391.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1654
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2982496579488119
          entropy_coeff: 0.009999999999999998
          kl: 0.010627738952370553
          policy_loss: -0.07529322256644568
          total_loss: -0.0761639086322652
          vf_explained_var: -0.6888225078582764
          vf_loss: 6.152553599905433e-06
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000
  iterations_since_restore: 608
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,608,14641.1,608000,0,0,0,391.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-09_02-28-43
  done: false
  episode_len_mean: 392.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1657
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5165651414129468
          entropy_coeff: 0.009999999999999998
          kl: 0.009794498726652624
          policy_loss: -0.12213801689859893
          total_loss: -0.12602094223515856
          vf_explained_var: -0.8019870519638062
          vf_loss: 0.00012617930690339563
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000
  iterations_since_restore: 609
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,609,14666.7,609000,0,0,0,392.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-09_02-29-07
  done: false
  episode_len_mean: 390.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1660
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.822493847211202
          entropy_coeff: 0.009999999999999998
          kl: 0.011969979307174741
          policy_loss: -0.07790593595968352
          total_loss: -0.08249066736963061
          vf_explained_var: -0.31092074513435364
          vf_loss: 5.6533547043121265e-06
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  iterations_since_restore: 610
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,610,14690.6,610000,0,0,0,390.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-09_02-29-28
  done: false
  episode_len_mean: 388.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1662
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.433540905184216
          entropy_coeff: 0.009999999999999998
          kl: 0.009943202743107222
          policy_loss: -0.027433401097853978
          total_loss: -0.03030045943127738
          vf_explained_var: -0.39692455530166626
          vf_loss: 0.00014241912087729563
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000
  iterations_since_restore: 611
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,611,14711.5,611000,0,0,0,388.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-09_02-29-45
  done: false
  episode_len_mean: 388.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1664
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7145848565631443
          entropy_coeff: 0.009999999999999998
          kl: 0.011197318112628609
          policy_loss: -0.1278840456985765
          total_loss: -0.13226520733700858
          vf_explained_var: 0.006919683422893286
          vf_loss: 1.0241694457767558e-05
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000
  iterations_since_restore: 612
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,612,14729.2,612000,0,0,0,388.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-09_02-30-05
  done: false
  episode_len_mean: 389.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1667
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6381418095694649
          entropy_coeff: 0.009999999999999998
          kl: 0.007898995255394642
          policy_loss: -0.1379736673914724
          total_loss: -0.14535282748854822
          vf_explained_var: -0.19305968284606934
          vf_loss: 4.806196703712986e-06
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000
  iterations_since_restore: 613
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,613,14749.3,613000,0,0,0,389.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-09_02-30-28
  done: false
  episode_len_mean: 384.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1669
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.733050541083018
          entropy_coeff: 0.009999999999999998
          kl: 0.012695161062176238
          policy_loss: -0.12546798014599417
          total_loss: -0.12833336691061656
          vf_explained_var: -0.6922754645347595
          vf_loss: 4.535398049180609e-06
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
  iterations_since_restore: 614
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,614,14771.9,614000,0,0,0,384.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-09_02-30-49
  done: false
  episode_len_mean: 385.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1672
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6772461705737645
          entropy_coeff: 0.009999999999999998
          kl: 0.01027474705696747
          policy_loss: -0.07965024076402187
          total_loss: -0.08471593856811524
          vf_explained_var: 0.2879345715045929
          vf_loss: 3.186101482949905e-06
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  iterations_since_restore: 615
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,615,14793,615000,0,0,0,385.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-09_02-31-11
  done: false
  episode_len_mean: 386.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1674
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6837672604454887
          entropy_coeff: 0.009999999999999998
          kl: 0.010170492288142994
          policy_loss: -0.08880167702833812
          total_loss: -0.0940506310098701
          vf_explained_var: -0.08311586827039719
          vf_loss: 3.889558244433526e-06
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iterations_since_restore: 616
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,616,14814.6,616000,0,0,0,386.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-09_02-31-34
  done: false
  episode_len_mean: 385.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1677
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.57260299788581
          entropy_coeff: 0.009999999999999998
          kl: 0.012352565312086878
          policy_loss: -0.0043876800479160415
          total_loss: -0.006038275650805897
          vf_explained_var: -0.566616415977478
          vf_loss: 5.090049119694918e-06
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  iterations_since_restore: 617
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,617,14837.6,617000,0,0,0,385.27




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-09_02-32-14
  done: false
  episode_len_mean: 384.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1680
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4755751397874621
          entropy_coeff: 0.009999999999999998
          kl: 0.0063882413806351694
          policy_loss: -0.11629126667976379
          total_loss: -0.1237644311454561
          vf_explained_var: -0.39390993118286133
          vf_loss: 5.979692367772157e-06
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  iterations_since_restore: 618
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,618,14877.8,618000,0,0,0,384.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-09_02-32-41
  done: false
  episode_len_mean: 385.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1683
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5985310832659403
          entropy_coeff: 0.009999999999999998
          kl: 0.008275284961645389
          policy_loss: 0.026836847845051023
          total_loss: 0.020282403007149695
          vf_explained_var: 0.2086239755153656
          vf_loss: 4.801531982442005e-06
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  iterations_since_restore: 619
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,619,14904.7,619000,0,0,0,385.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-09_02-33-09
  done: false
  episode_len_mean: 385.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1686
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5161228206422594
          entropy_coeff: 0.009999999999999998
          kl: 0.011529936219079174
          policy_loss: -0.06409359880619579
          total_loss: -0.06607026093535953
          vf_explained_var: -0.8114820122718811
          vf_loss: 5.124239918485627e-05
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  iterations_since_restore: 620
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,620,14932.8,620000,0,0,0,385.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-09_02-33-34
  done: false
  episode_len_mean: 386.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1689
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.115758228302002
          entropy_coeff: 0.009999999999999998
          kl: 0.009277716451881801
          policy_loss: -0.06791677508089278
          total_loss: -0.06841974374320772
          vf_explained_var: -0.03428147733211517
          vf_loss: 8.67147796624421e-05
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iterations_since_restore: 621
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,621,14958,621000,0,0,0,386.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-09_02-33-57
  done: false
  episode_len_mean: 386.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1692
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7124952263302273
          entropy_coeff: 0.009999999999999998
          kl: 0.010093905321031115
          policy_loss: -0.04822932134071986
          total_loss: -0.053846224314636655
          vf_explained_var: 0.4710402488708496
          vf_loss: 1.0461754949473188e-05
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  iterations_since_restore: 622
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,622,14981,622000,0,0,0,386.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-09_02-34-18
  done: false
  episode_len_mean: 389.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1694
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7283457226223415
          entropy_coeff: 0.009999999999999998
          kl: 0.010022543677008361
          policy_loss: -0.052460816212826306
          total_loss: -0.05832359517614047
          vf_explained_var: 0.06587829440832138
          vf_loss: 4.372326932550802e-06
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iterations_since_restore: 623
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,623,15002,623000,0,0,0,389.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-09_02-34-38
  done: false
  episode_len_mean: 391.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1696
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5771699839168125
          entropy_coeff: 0.009999999999999998
          kl: 0.010637858083177399
          policy_loss: -0.04741627246969276
          total_loss: -0.051064762307537925
          vf_explained_var: -0.568203330039978
          vf_loss: 6.0261252732137414e-06
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  iterations_since_restore: 624
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,624,15021.4,624000,0,0,0,391.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-09_02-34-58
  done: false
  episode_len_mean: 393.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1699
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8688919464747111
          entropy_coeff: 0.009999999999999998
          kl: 0.00849483851664757
          policy_loss: -0.09710579156461689
          total_loss: -0.10611471095018916
          vf_explained_var: -0.09896843880414963
          vf_loss: 3.848038804537484e-06
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  iterations_since_restore: 625
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,625,15041.2,625000,0,0,0,393.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-09_02-35-17
  done: false
  episode_len_mean: 392.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1701
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.640789642598894
          entropy_coeff: 0.009999999999999998
          kl: 0.007872445997590146
          policy_loss: -0.1402031378613578
          total_loss: -0.14764069037304983
          vf_explained_var: -0.8453019857406616
          vf_loss: 3.1345224354279507e-06
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterations_since_restore: 626
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,626,15060.7,626000,0,0,0,392.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-09_02-35-41
  done: false
  episode_len_mean: 391.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1704
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6629636896981133
          entropy_coeff: 0.009999999999999998
          kl: 0.009494256687064408
          policy_loss: -0.06777724004868003
          total_loss: -0.07322836338231961
          vf_explained_var: -0.9307692050933838
          vf_loss: 0.00036395983287952226
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations_since_restore: 627
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,627,15084.7,627000,0,0,0,391.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-09_02-36-03
  done: false
  episode_len_mean: 391.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1706
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4263485027684106
          entropy_coeff: 0.009999999999999998
          kl: 0.010236899079161373
          policy_loss: -0.13798318298326598
          total_loss: -0.14057665690779686
          vf_explained_var: 0.1161528080701828
          vf_loss: 9.539248777754336e-06
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iterations_since_restore: 628
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,628,15106.3,628000,0,0,0,391.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-09_02-36-25
  done: false
  episode_len_mean: 389.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1709
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6156831105550131
          entropy_coeff: 0.009999999999999998
          kl: 0.010145427631054183
          policy_loss: -0.05767307885819011
          total_loss: -0.062268419274025495
          vf_explained_var: -0.07836730033159256
          vf_loss: 5.2124183639534746e-06
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  iterations_since_restore: 629
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,629,15128,629000,0,0,0,389.18




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-09_02-37-01
  done: false
  episode_len_mean: 390.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1711
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9337976124551561
          entropy_coeff: 0.009999999999999998
          kl: 0.00984092467321024
          policy_loss: -0.0509777611742417
          total_loss: -0.05910362878607379
          vf_explained_var: -0.4123597741127014
          vf_loss: 2.6796357398476783e-06
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iterations_since_restore: 630
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,630,15164.7,630000,0,0,0,390.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-09_02-37-21
  done: false
  episode_len_mean: 391.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1714
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6604089246855842
          entropy_coeff: 0.009999999999999998
          kl: 0.011274026459397193
          policy_loss: -0.1535636730492115
          total_loss: -0.15732352286577225
          vf_explained_var: -0.05562308803200722
          vf_loss: 2.415997131619305e-06
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations_since_restore: 631
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,631,15184.9,631000,0,0,0,391.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-09_02-37-45
  done: false
  episode_len_mean: 392.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1716
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8634047945340475
          entropy_coeff: 0.009999999999999998
          kl: 0.011153880855537639
          policy_loss: -0.07263481958458821
          total_loss: -0.07856098732186688
          vf_explained_var: -0.36538636684417725
          vf_loss: 2.9120829291665964e-06
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_since_restore: 632
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,632,15208.2,632000,0,0,0,392.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-09_02-38-03
  done: false
  episode_len_mean: 393.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1719
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7282640324698555
          entropy_coeff: 0.009999999999999998
          kl: 0.012256773428250729
          policy_loss: -0.08287627966039711
          total_loss: -0.08619450889527798
          vf_explained_var: -0.7211042046546936
          vf_loss: 3.177639001074163e-06
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  iterations_since_restore: 633
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,633,15226.8,633000,0,0,0,393.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-09_02-38-22
  done: false
  episode_len_mean: 393.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1721
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3557765318287744
          entropy_coeff: 0.009999999999999998
          kl: 0.0069426336559401764
          policy_loss: -0.00021556814511617024
          total_loss: -0.005862859595153067
          vf_explained_var: 0.21873050928115845
          vf_loss: 2.380684807374362e-06
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_since_restore: 634
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,634,15245.6,634000,0,0,0,393.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-09_02-38-41
  done: false
  episode_len_mean: 393.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1723
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7840026444858974
          entropy_coeff: 0.009999999999999998
          kl: 0.010138508530990917
          policy_loss: -0.044276497926976945
          total_loss: -0.05056504822439618
          vf_explained_var: 0.18225066363811493
          vf_loss: 3.08076337079102e-06
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iterations_since_restore: 635
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,635,15264.7,635000,0,0,0,393.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-09_02-39-00
  done: false
  episode_len_mean: 392.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1726
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7934981438848707
          entropy_coeff: 0.009999999999999998
          kl: 0.00957604234113069
          policy_loss: -0.041486560097999045
          total_loss: -0.04851071341998047
          vf_explained_var: -0.3548753559589386
          vf_loss: 3.1181594320312596e-06
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterations_since_restore: 636
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,636,15283.7,636000,0,0,0,392.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-09_02-39-19
  done: false
  episode_len_mean: 394.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1728
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.824682190683153
          entropy_coeff: 0.009999999999999998
          kl: 0.011086951918354368
          policy_loss: -0.06418713393310706
          total_loss: -0.0698036511739095
          vf_explained_var: -0.5914075374603271
          vf_loss: 1.5749209189329526e-06
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_since_restore: 637
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,637,15301.9,637000,0,0,0,394.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-09_02-39-39
  done: false
  episode_len_mean: 395.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1730
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.568673712015152
          entropy_coeff: 0.009999999999999998
          kl: 0.008905887364985886
          policy_loss: -0.061577654505769415
          total_loss: -0.06711846481387813
          vf_explained_var: -0.3139599561691284
          vf_loss: 1.5654583352089603e-06
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_since_restore: 638
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,638,15322.5,638000,0,0,0,395.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-09_02-40-00
  done: false
  episode_len_mean: 394.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1733
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7785595522986517
          entropy_coeff: 0.009999999999999998
          kl: 0.008588560373750376
          policy_loss: -0.12160008511402541
          total_loss: -0.12960056871589687
          vf_explained_var: -0.8369750380516052
          vf_loss: 2.204527101589014e-06
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_since_restore: 639
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,639,15343.1,639000,0,0,0,394.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-09_02-40-23
  done: false
  episode_len_mean: 392.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1736
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9292564630508422
          entropy_coeff: 0.009999999999999998
          kl: 0.013410639460631904
          policy_loss: -0.09146277560955948
          total_loss: -0.09547859090897771
          vf_explained_var: -0.34463605284690857
          vf_loss: 1.1919002916884994e-06
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_since_restore: 640
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,640,15365.9,640000,0,0,0,392.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-09_02-40-40
  done: false
  episode_len_mean: 393.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1738
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6340547283490499
          entropy_coeff: 0.009999999999999998
          kl: 0.010438625275100557
          policy_loss: -0.07581599089834425
          total_loss: -0.08026483414901628
          vf_explained_var: -0.03371727839112282
          vf_loss: 1.4562293510304396e-06
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_since_restore: 641
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,641,15383.8,641000,0,0,0,393.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-09_02-41-20
  done: false
  episode_len_mean: 391.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1740
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6394035776456197
          entropy_coeff: 0.009999999999999998
          kl: 0.007327601934839656
          policy_loss: 0.009160988653699556
          total_loss: 0.001115544078250726
          vf_explained_var: -0.05840006843209267
          vf_loss: 1.9946048957485294e-06
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations_since_restore: 642
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,642,15423.1,642000,0,0,0,391.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-09_02-41-42
  done: false
  episode_len_mean: 392.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1743
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4786200457149081
          entropy_coeff: 0.009999999999999998
          kl: 0.011040795087544556
          policy_loss: -0.13367128061751524
          total_loss: -0.13587860469188956
          vf_explained_var: 0.32092514634132385
          vf_loss: 2.717666324593261e-06
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_since_restore: 643
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,643,15445,643000,0,0,0,392.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-09_02-42-03
  done: false
  episode_len_mean: 392.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1745
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7114587412940132
          entropy_coeff: 0.009999999999999998
          kl: 0.010845778473889725
          policy_loss: -0.09871264066961076
          total_loss: -0.10342027329736286
          vf_explained_var: -0.9844067096710205
          vf_loss: 5.2935626873325724e-05
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
  iterations_since_restore: 644
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,644,15466.2,644000,0,0,0,392.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-09_02-42-24
  done: false
  episode_len_mean: 390.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1748
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6754093647003174
          entropy_coeff: 0.009999999999999998
          kl: 0.010585635972685263
          policy_loss: -0.0550699678560098
          total_loss: -0.059756319224834445
          vf_explained_var: -0.044790979474782944
          vf_loss: 1.0041922947342148e-05
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  iterations_since_restore: 645
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,645,15487.5,645000,0,0,0,390.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-09_02-42-47
  done: false
  episode_len_mean: 391.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1751
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2977166851361592
          entropy_coeff: 0.009999999999999998
          kl: 0.008626709395659462
          policy_loss: -0.1005651530292299
          total_loss: -0.10371372000210816
          vf_explained_var: 0.010128336027264595
          vf_loss: 2.237222365566696e-06
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  iterations_since_restore: 646
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,646,15510.5,646000,0,0,0,391.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-09_02-43-05
  done: false
  episode_len_mean: 393.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1753
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5517653942108154
          entropy_coeff: 0.009999999999999998
          kl: 0.011134927405117918
          policy_loss: -0.1273666558166345
          total_loss: -0.13019694143699276
          vf_explained_var: 0.42558813095092773
          vf_loss: 3.992342020511488e-06
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iterations_since_restore: 647
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,647,15528.1,647000,0,0,0,393.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-09_02-43-24
  done: false
  episode_len_mean: 393.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1755
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.753909202416738
          entropy_coeff: 0.009999999999999998
          kl: 0.010837289389109057
          policy_loss: -0.20799737866553997
          total_loss: -0.2131897230943044
          vf_explained_var: -0.09140671789646149
          vf_loss: 2.399626021087897e-06
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations_since_restore: 648
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,648,15547.5,648000,0,0,0,393.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-09_02-43-48
  done: false
  episode_len_mean: 393.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1758
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6903135418891906
          entropy_coeff: 0.009999999999999998
          kl: 0.008926035090664482
          policy_loss: -0.11310380436480046
          total_loss: -0.11983793729709255
          vf_explained_var: -0.5711444020271301
          vf_loss: 1.6883591513305874e-06
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  iterations_since_restore: 649
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,649,15571.4,649000,0,0,0,393.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-09_02-44-08
  done: false
  episode_len_mean: 394.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1761
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7186572697427538
          entropy_coeff: 0.009999999999999998
          kl: 0.009275048829429621
          policy_loss: -0.0755198890550269
          total_loss: -0.0821372719688548
          vf_explained_var: -0.40414440631866455
          vf_loss: 4.328474608428223e-06
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000
  iterations_since_restore: 650
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,650,15591.5,650000,0,0,0,394.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-09_02-44-29
  done: false
  episode_len_mean: 395.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1763
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6860315256648593
          entropy_coeff: 0.009999999999999998
          kl: 0.010293910231384176
          policy_loss: -0.15743002448644902
          total_loss: -0.1625628819482194
          vf_explained_var: -0.26921477913856506
          vf_loss: 2.0515686868647106e-06
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
  iterations_since_restore: 651
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,651,15612.3,651000,0,0,0,395.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-09_02-44-51
  done: false
  episode_len_mean: 391.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1766
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7561980300479465
          entropy_coeff: 0.009999999999999998
          kl: 0.011657473913339286
          policy_loss: -0.05462792971067958
          total_loss: -0.05890883414281739
          vf_explained_var: -0.25138792395591736
          vf_loss: 2.4839466410260985e-06
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
  iterations_since_restore: 652
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,652,15633.8,652000,0,0,0,391.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-09_02-45-11
  done: false
  episode_len_mean: 392.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1769
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6555556588702731
          entropy_coeff: 0.009999999999999998
          kl: 0.010541353869193672
          policy_loss: -0.07385585001773304
          total_loss: -0.07839990854263305
          vf_explained_var: -0.4263138175010681
          vf_loss: 4.236760910064024e-06
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  iterations_since_restore: 653
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,653,15654.2,653000,0,0,0,392.68




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-09_02-45-52
  done: false
  episode_len_mean: 391.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1771
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.711544132232666
          entropy_coeff: 0.009999999999999998
          kl: 0.009840855914529614
          policy_loss: -0.1294195185829368
          total_loss: -0.13532236949023274
          vf_explained_var: -0.47968995571136475
          vf_loss: 3.2389974225882017e-06
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  iterations_since_restore: 654
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,654,15694.7,654000,0,0,0,391.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-09_02-46-10
  done: false
  episode_len_mean: 390.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1774
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8598845521608989
          entropy_coeff: 0.009999999999999998
          kl: 0.008991517147544216
          policy_loss: -0.00545366501642598
          total_loss: -0.013809635821315978
          vf_explained_var: -0.42576953768730164
          vf_loss: 9.737357800077007e-07
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  iterations_since_restore: 655
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,655,15713.2,655000,0,0,0,390.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-09_02-46-32
  done: false
  episode_len_mean: 392.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1776
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.47441027296914
          entropy_coeff: 0.009999999999999998
          kl: 0.00840961804603416
          policy_loss: -0.10166726164105866
          total_loss: -0.10633758126447598
          vf_explained_var: -1.0
          vf_loss: 0.0004946985702822026
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656000
  iterations_since_restore: 656
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,656,15734.7,656000,0,0,0,392.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-09_02-46-54
  done: false
  episode_len_mean: 393.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1779
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.627231866783566
          entropy_coeff: 0.009999999999999998
          kl: 0.011682016858042557
          policy_loss: -0.10641879667010572
          total_loss: -0.10937935751345422
          vf_explained_var: -0.5464821457862854
          vf_loss: 5.209485299120489e-06
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 657000
  iterations_since_restore: 657
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,657,15757.4,657000,0,0,0,393.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-09_02-47-14
  done: false
  episode_len_mean: 394.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1781
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9722902827792697
          entropy_coeff: 0.009999999999999998
          kl: 0.008725799157637621
          policy_loss: -0.06075512187348472
          total_loss: -0.07053744080993864
          vf_explained_var: -0.3119673728942871
          vf_loss: 1.3533121388162626e-06
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658000
  iterations_since_restore: 658
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,658,15777.4,658000,0,0,0,394.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-09_02-47-35
  done: false
  episode_len_mean: 396.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1784
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.572372892167833
          entropy_coeff: 0.009999999999999998
          kl: 0.009586765417594694
          policy_loss: -0.1036219749185774
          total_loss: -0.10842479492227236
          vf_explained_var: -0.7561138868331909
          vf_loss: 9.836738993524503e-07
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659000
  iterations_since_restore: 659
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,659,15798.4,659000,0,0,0,396.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-09_02-47-57
  done: false
  episode_len_mean: 398.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1787
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7961475756433276
          entropy_coeff: 0.009999999999999998
          kl: 0.008938161348867506
          policy_loss: -0.04016741145816114
          total_loss: -0.04794626987228791
          vf_explained_var: -0.847322404384613
          vf_loss: 1.4929436196729917e-06
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660000
  iterations_since_restore: 660
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,660,15820.3,660000,0,0,0,398.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-09_02-48-22
  done: false
  episode_len_mean: 397.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1790
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6942921055687798
          entropy_coeff: 0.009999999999999998
          kl: 0.0102612310793174
          policy_loss: -0.06609373423788283
          total_loss: -0.07134755911926428
          vf_explained_var: -0.6921029090881348
          vf_loss: 9.145584764761831e-07
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000
  iterations_since_restore: 661
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,661,15844.7,661000,0,0,0,397.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-09_02-48-43
  done: false
  episode_len_mean: 398.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1792
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6562342166900634
          entropy_coeff: 0.009999999999999998
          kl: 0.010619022399723713
          policy_loss: -0.09378816270165974
          total_loss: -0.098252129720317
          vf_explained_var: 0.0031446581706404686
          vf_loss: 2.6447362579852374e-06
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 662000
  iterations_since_restore: 662
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,662,15865.8,662000,0,0,0,398.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-09_02-49-08
  done: false
  episode_len_mean: 396.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1795
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6477523975902133
          entropy_coeff: 0.009999999999999998
          kl: 0.00899459996553544
          policy_loss: -0.06310085194806257
          total_loss: -0.06933191041979525
          vf_explained_var: -0.7649339437484741
          vf_loss: 1.0525682941735997e-06
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000
  iterations_since_restore: 663
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,663,15891,663000,0,0,0,396.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-09_02-49-31
  done: false
  episode_len_mean: 393.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1798
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1977934506204393
          entropy_coeff: 0.009999999999999998
          kl: 0.011203970905830276
          policy_loss: -0.05944203399121761
          total_loss: -0.05865630689594481
          vf_explained_var: -0.8818566799163818
          vf_loss: 1.639136046883828e-06
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  iterations_since_restore: 664
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,664,15914,664000,0,0,0,393.14




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-09_02-50-09
  done: false
  episode_len_mean: 391.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1801
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4737310316827563
          entropy_coeff: 0.009999999999999998
          kl: 0.010637645071119155
          policy_loss: -0.08798311104377111
          total_loss: -0.09060147437784406
          vf_explained_var: -0.35137224197387695
          vf_loss: 2.0012047230011374e-06
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000
  iterations_since_restore: 665
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,665,15952.2,665000,0,0,0,391.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-09_02-50-31
  done: false
  episode_len_mean: 390.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1803
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5457680940628051
          entropy_coeff: 0.009999999999999998
          kl: 0.01183082337273227
          policy_loss: -0.10295317040549384
          total_loss: -0.10493337396118375
          vf_explained_var: -0.5790718197822571
          vf_loss: 1.4286558275165994e-06
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  iterations_since_restore: 666
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,666,15974,666000,0,0,0,390.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-09_02-50-51
  done: false
  episode_len_mean: 392.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1805
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1674799038304222
          entropy_coeff: 0.009999999999999998
          kl: 0.008040894135666735
          policy_loss: -0.07111528294367922
          total_loss: -0.07337164709137546
          vf_explained_var: -0.9068215489387512
          vf_loss: 0.00025935148115650006
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000
  iterations_since_restore: 667
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,667,15994.2,667000,0,0,0,392.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-09_02-51-13
  done: false
  episode_len_mean: 392.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1808
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3335089074240791
          entropy_coeff: 0.009999999999999998
          kl: 0.009713135419980349
          policy_loss: -0.12466328220648898
          total_loss: -0.1269330598000023
          vf_explained_var: -0.26294755935668945
          vf_loss: 1.44174654539913e-06
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 668000
  iterations_since_restore: 668
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,668,16015.5,668000,0,0,0,392.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-09_02-51-36
  done: false
  episode_len_mean: 391.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1811
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.53082099225786
          entropy_coeff: 0.009999999999999998
          kl: 0.009522674450650647
          policy_loss: -0.045822682914634545
          total_loss: -0.05028308596875933
          vf_explained_var: -0.7078400254249573
          vf_loss: 8.866101394586521e-07
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000
  iterations_since_restore: 669
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,669,16039.1,669000,0,0,0,391.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-09_02-51-55
  done: false
  episode_len_mean: 393.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1813
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1645787060260773
          entropy_coeff: 0.009999999999999998
          kl: 0.008180257539419896
          policy_loss: -0.043001653254032136
          total_loss: -0.045327618593970935
          vf_explained_var: -0.6371458768844604
          vf_loss: 1.995919896873804e-06
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000
  iterations_since_restore: 670
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,670,16057.8,670000,0,0,0,393.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-09_02-52-19
  done: false
  episode_len_mean: 391.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1816
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4985210180282593
          entropy_coeff: 0.009999999999999998
          kl: 0.011231747685388151
          policy_loss: -0.08109510358836916
          total_loss: -0.0832854178837604
          vf_explained_var: -0.40418481826782227
          vf_loss: 1.2338773046849384e-06
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 671000
  iterations_since_restore: 671
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,671,16081.9,671000,0,0,0,391.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-09_02-52-43
  done: false
  episode_len_mean: 389.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1819
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7123527116245694
          entropy_coeff: 0.009999999999999998
          kl: 0.010059441427999182
          policy_loss: -0.08572725144525369
          total_loss: -0.09139143102284934
          vf_explained_var: -0.7963040471076965
          vf_loss: 1.0131545006212642e-06
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000
  iterations_since_restore: 672
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,672,16105.9,672000,0,0,0,389.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-09_02-53-06
  done: false
  episode_len_mean: 388.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1821
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.379256784915924
          entropy_coeff: 0.009999999999999998
          kl: 0.008337833975127742
          policy_loss: -0.08731569366322624
          total_loss: -0.09160987246367666
          vf_explained_var: -0.8928234577178955
          vf_loss: 1.0743199330439286e-06
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000
  iterations_since_restore: 673
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,673,16128.5,673000,0,0,0,388.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-09_02-53-30
  done: false
  episode_len_mean: 386.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1824
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5595870905452305
          entropy_coeff: 0.009999999999999998
          kl: 0.009270909316628165
          policy_loss: -0.04326051957905293
          total_loss: -0.0482952072388596
          vf_explained_var: -0.21194498240947723
          vf_loss: 1.0380513104893098e-06
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
  iterations_since_restore: 674
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,674,16152.5,674000,0,0,0,386.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-09_02-53-51
  done: false
  episode_len_mean: 384.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1827
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5865340100394354
          entropy_coeff: 0.009999999999999998
          kl: 0.007947392578226145
          policy_loss: -0.07383922804147006
          total_loss: -0.08065095712534255
          vf_explained_var: -0.5137144923210144
          vf_loss: 1.0343291311048435e-06
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
  iterations_since_restore: 675
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,675,16174,675000,0,0,0,384.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-09_02-54-32
  done: false
  episode_len_mean: 382.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1830
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.452451467514038
          entropy_coeff: 0.009999999999999998
          kl: 0.009665620097576037
          policy_loss: -0.09207396534168058
          total_loss: -0.09558764567805661
          vf_explained_var: -0.7656329870223999
          vf_loss: 1.0882755387026717e-06
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
  iterations_since_restore: 676
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,676,16214.8,676000,0,0,0,382.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-09_02-54-57
  done: false
  episode_len_mean: 380.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1833
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.547161012225681
          entropy_coeff: 0.009999999999999998
          kl: 0.010753940993343866
          policy_loss: -0.08124321877128549
          total_loss: -0.0843665912747383
          vf_explained_var: -0.7810381650924683
          vf_loss: 9.882716186666584e-05
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  iterations_since_restore: 677
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,677,16240,677000,0,0,0,380.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-09_02-55-18
  done: false
  episode_len_mean: 380.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1835
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5987629228168063
          entropy_coeff: 0.009999999999999998
          kl: 0.008733974110255439
          policy_loss: -0.0549921735500296
          total_loss: -0.06101950423585044
          vf_explained_var: -1.0
          vf_loss: 1.1757819457771904e-05
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
  iterations_since_restore: 678
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,678,16260.1,678000,0,0,0,380.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-09_02-55-38
  done: false
  episode_len_mean: 380.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1838
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6268476406733194
          entropy_coeff: 0.009999999999999998
          kl: 0.009220730704556533
          policy_loss: -0.01877795976276199
          total_loss: -0.024541031517502334
          vf_explained_var: -0.904775857925415
          vf_loss: 2.4162745565566485e-06
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
  iterations_since_restore: 679
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,679,16280.3,679000,0,0,0,380.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-09_02-56-01
  done: false
  episode_len_mean: 379.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1840
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5728372825516594
          entropy_coeff: 0.009999999999999998
          kl: 0.011424629562869148
          policy_loss: -0.03290194691055351
          total_loss: -0.03561540109415849
          vf_explained_var: -0.4401156008243561
          vf_loss: 1.5500377069580887e-06
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000
  iterations_since_restore: 680
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,680,16303.5,680000,0,0,0,379.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-09_02-56-22
  done: false
  episode_len_mean: 379.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1843
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4938524895244174
          entropy_coeff: 0.009999999999999998
          kl: 0.01171325582534777
          policy_loss: -0.06289418569455545
          total_loss: -0.06448913272470236
          vf_explained_var: -0.9841667413711548
          vf_loss: 1.4488908694450705e-06
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000
  iterations_since_restore: 681
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,681,16324.1,681000,0,0,0,379.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-09_02-56-49
  done: false
  episode_len_mean: 377.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1846
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5394068638483682
          entropy_coeff: 0.009999999999999998
          kl: 0.008596577509279908
          policy_loss: -0.06534818659226099
          total_loss: -0.07094832952651713
          vf_explained_var: -0.012361268512904644
          vf_loss: 1.887063435409194e-06
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682000
  iterations_since_restore: 682
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,682,16351.7,682000,0,0,0,377.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-09_02-57-11
  done: false
  episode_len_mean: 376.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1849
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2470900277296701
          entropy_coeff: 0.009999999999999998
          kl: 0.008470273412282165
          policy_loss: -0.06483283295399613
          total_loss: -0.06765404525730345
          vf_explained_var: -0.6983962655067444
          vf_loss: 1.5178661802413748e-06
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683000
  iterations_since_restore: 683
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,683,16373.1,683000,0,0,0,376.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-09_02-57-37
  done: false
  episode_len_mean: 373.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1852
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.0372170901960798
          entropy_coeff: 0.009999999999999998
          kl: 0.009311200603974928
          policy_loss: -0.012021993597348532
          total_loss: -0.011438185804420048
          vf_explained_var: -0.8171742558479309
          vf_loss: 0.00034993884097313336
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000
  iterations_since_restore: 684
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,684,16399.8,684000,0,0,0,373.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-09_02-58-08
  done: false
  episode_len_mean: 367.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1856
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1019247757063972
          entropy_coeff: 0.009999999999999998
          kl: 0.007875955010377429
          policy_loss: -0.022823840917812453
          total_loss: -0.024870047780374684
          vf_explained_var: 0.2911466956138611
          vf_loss: 1.8343111599329455e-06
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000
  iterations_since_restore: 685
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,685,16430.4,685000,0,0,0,367.93




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-09_02-58-52
  done: false
  episode_len_mean: 364.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1860
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5558589975039163
          entropy_coeff: 0.009999999999999998
          kl: 0.010818937125512502
          policy_loss: -0.060283835646178986
          total_loss: -0.06351736129985916
          vf_explained_var: -0.20184598863124847
          vf_loss: 1.6181281908858283e-06
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 686000
  iterations_since_restore: 686
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,686,16474.8,686000,0,0,0,364.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-09_02-59-21
  done: false
  episode_len_mean: 360.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1863
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7949947396914163
          entropy_coeff: 0.009999999999999998
          kl: 0.00797497603497877
          policy_loss: -0.04793541667362054
          total_loss: -0.056800257311099105
          vf_explained_var: -0.522867739200592
          vf_loss: 1.1104045856629657e-06
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000
  iterations_since_restore: 687
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,687,16503.7,687000,0,0,0,360.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-09_02-59-50
  done: false
  episode_len_mean: 358.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1867
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6718229916360643
          entropy_coeff: 0.009999999999999998
          kl: 0.007740079093568082
          policy_loss: -0.0729910274864071
          total_loss: -0.08089215316706233
          vf_explained_var: -1.0
          vf_loss: 6.710963068220129e-07
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000
  iterations_since_restore: 688
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,688,16532.5,688000,0,0,0,358.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-09_03-00-15
  done: false
  episode_len_mean: 356.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1869
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5100864304436579
          entropy_coeff: 0.009999999999999998
          kl: 0.0038303656366308935
          policy_loss: 0.011392236811419328
          total_loss: 0.0006551292052285538
          vf_explained_var: -1.0
          vf_loss: 7.302926661623132e-07
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000
  iterations_since_restore: 689
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,689,16556.9,689000,0,0,0,356.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-09_03-00-41
  done: false
  episode_len_mean: 355.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1873
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5961341712209913
          entropy_coeff: 0.009999999999999998
          kl: 0.01457760945762969
          policy_loss: -0.08134656471924649
          total_loss: -0.08900451606346502
          vf_explained_var: -0.6836131811141968
          vf_loss: 9.864863288713523e-07
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000
  iterations_since_restore: 690
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,690,16583,690000,0,0,0,355.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-09_03-01-06
  done: false
  episode_len_mean: 352.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1876
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5559661666552225
          entropy_coeff: 0.009999999999999998
          kl: 0.011179232929965465
          policy_loss: -0.05744197656297022
          total_loss: -0.06663400039283765
          vf_explained_var: -0.9737604856491089
          vf_loss: 7.141049773432921e-07
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  iterations_since_restore: 691
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,691,16608.5,691000,0,0,0,352.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-09_03-01-35
  done: false
  episode_len_mean: 350.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1879
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4204019784927369
          entropy_coeff: 0.009999999999999998
          kl: 0.014294614320897263
          policy_loss: -0.07101674342734945
          total_loss: -0.07707894189904134
          vf_explained_var: -0.2823769450187683
          vf_loss: 5.923998549582798e-07
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000
  iterations_since_restore: 692
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,692,16637.3,692000,0,0,0,350.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-09_03-02-04
  done: false
  episode_len_mean: 345.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1883
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.55235669347975
          entropy_coeff: 0.009999999999999998
          kl: 0.015280167569577053
          policy_loss: -0.10903915853963958
          total_loss: -0.11585959734188186
          vf_explained_var: -0.5646441578865051
          vf_loss: 5.962745512356883e-07
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000
  iterations_since_restore: 693
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,693,16666.6,693000,0,0,0,345.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-09_03-02-32
  done: false
  episode_len_mean: 342.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1886
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5023436254925198
          entropy_coeff: 0.009999999999999998
          kl: 0.013431123829387895
          policy_loss: -0.029381926730275156
          total_loss: -0.036755386864145596
          vf_explained_var: -0.46884214878082275
          vf_loss: 5.324852316511573e-07
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000
  iterations_since_restore: 694
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,694,16694.4,694000,0,0,0,342.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-09_03-02-57
  done: false
  episode_len_mean: 342.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1889
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.547072364224328
          entropy_coeff: 0.009999999999999998
          kl: 0.014950953217711888
          policy_loss: -0.07290834714141158
          total_loss: -0.07986322380602359
          vf_explained_var: -0.8488155007362366
          vf_loss: 8.117540106569423e-07
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000
  iterations_since_restore: 695
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,695,16718.9,695000,0,0,0,342.6




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-09_03-03-37
  done: false
  episode_len_mean: 341.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1891
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5943749838405186
          entropy_coeff: 0.009999999999999998
          kl: 0.01390337964903942
          policy_loss: -0.058146168912450474
          total_loss: -0.0661706898568405
          vf_explained_var: -0.707194447517395
          vf_loss: 8.195450372265138e-07
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 696000
  iterations_since_restore: 696
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,696,16759.2,696000,0,0,0,341.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-09_03-04-03
  done: false
  episode_len_mean: 341.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1895
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4710113551881578
          entropy_coeff: 0.009999999999999998
          kl: 0.013582648271940003
          policy_loss: -0.035844625677499506
          total_loss: -0.04281816519796848
          vf_explained_var: 0.1738884598016739
          vf_loss: 8.305416801906378e-07
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000
  iterations_since_restore: 697
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,697,16785.1,697000,0,0,0,341.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-09_03-04-28
  done: false
  episode_len_mean: 341.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1898
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4582350042131211
          entropy_coeff: 0.009999999999999998
          kl: 0.013492933752198077
          policy_loss: -0.03082929402589798
          total_loss: -0.03772602387600475
          vf_explained_var: -0.2324323207139969
          vf_loss: 9.756439699989036e-07
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
  iterations_since_restore: 698
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,698,16809.8,698000,0,0,0,341.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-09_03-04-49
  done: false
  episode_len_mean: 342.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1900
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4123219503296747
          entropy_coeff: 0.009999999999999998
          kl: 0.01407665286604944
          policy_loss: -0.020996263954374525
          total_loss: -0.027101295275820628
          vf_explained_var: -0.642211377620697
          vf_loss: 1.0932677690157612e-06
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000
  iterations_since_restore: 699
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,699,16831.4,699000,0,0,0,342.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-09_03-05-17
  done: false
  episode_len_mean: 337.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1904
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5271817604700724
          entropy_coeff: 0.009999999999999998
          kl: 0.01603389278468749
          policy_loss: -0.06953361738059256
          total_loss: -0.07567275576293468
          vf_explained_var: -0.5173407196998596
          vf_loss: 8.766249973430401e-07
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
  iterations_since_restore: 700
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,700,16859,700000,0,0,0,337.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-09_03-05-42
  done: false
  episode_len_mean: 335.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1907
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4056682997279697
          entropy_coeff: 0.009999999999999998
          kl: 0.022520431219865442
          policy_loss: -0.03752084755235248
          total_loss: -0.038749840317500964
          vf_explained_var: 0.25164729356765747
          vf_loss: 1.6010148047800815e-06
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 701000
  iterations_since_restore: 701
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,701,16884.3,701000,0,0,0,335.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-09_03-06-13
  done: false
  episode_len_mean: 332.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1910
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.3873362607426114
          entropy_coeff: 0.009999999999999998
          kl: 0.01159695338830065
          policy_loss: -0.0776002640939421
          total_loss: -0.08156199701544312
          vf_explained_var: -0.9708914756774902
          vf_loss: 4.388560425796742e-06
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000
  iterations_since_restore: 702
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,702,16915.6,702000,0,0,0,332.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-09_03-06-39
  done: false
  episode_len_mean: 327.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1913
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6325573444366455
          entropy_coeff: 0.009999999999999998
          kl: 0.014457308748730075
          policy_loss: -0.08653720186816322
          total_loss: -0.09050286541589432
          vf_explained_var: -0.7983362674713135
          vf_loss: 9.079236881209202e-06
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000
  iterations_since_restore: 703
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,703,16941.2,703000,0,0,0,327.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-09_03-07-04
  done: false
  episode_len_mean: 327.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1917
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4169264992078145
          entropy_coeff: 0.009999999999999998
          kl: 0.013293108078418134
          policy_loss: -0.055707990440229574
          total_loss: -0.058519453348384966
          vf_explained_var: -0.2664356529712677
          vf_loss: 1.543632383042374e-06
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000
  iterations_since_restore: 704
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,704,16966.4,704000,0,0,0,327.32




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-09_03-07-48
  done: false
  episode_len_mean: 325.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1920
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6066765745480855
          entropy_coeff: 0.009999999999999998
          kl: 0.007344735250607452
          policy_loss: -0.06590999642180072
          total_loss: -0.07570148015187847
          vf_explained_var: -0.2816881537437439
          vf_loss: 6.984718908142895e-07
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
  iterations_since_restore: 705
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,705,17009.7,705000,0,0,0,325.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-09_03-08-16
  done: false
  episode_len_mean: 323.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1923
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.383962673611111
          entropy_coeff: 0.009999999999999998
          kl: 0.009242241225632478
          policy_loss: -0.06343407956883311
          total_loss: -0.06937499017351204
          vf_explained_var: -0.3296220600605011
          vf_loss: 3.0986212689438415e-06
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000
  iterations_since_restore: 706
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,706,17038.3,706000,0,0,0,323.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-09_03-08-43
  done: false
  episode_len_mean: 321.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1926
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4835408687591554
          entropy_coeff: 0.009999999999999998
          kl: 0.009699913140202436
          policy_loss: -0.08872695602476596
          total_loss: -0.0952749780482716
          vf_explained_var: 0.09225862473249435
          vf_loss: 7.81417628875008e-07
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000
  iterations_since_restore: 707
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,707,17065,707000,0,0,0,321.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-09_03-09-09
  done: false
  episode_len_mean: 320.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1930
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4311512589454651
          entropy_coeff: 0.009999999999999998
          kl: 0.013748451399889068
          policy_loss: -0.06011425589935647
          total_loss: -0.06267306618392468
          vf_explained_var: -0.7104936242103577
          vf_loss: 7.444587285792497e-06
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 708000
  iterations_since_restore: 708
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,708,17091.3,708000,0,0,0,320.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-09_03-09-33
  done: false
  episode_len_mean: 320.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1933
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4594067679511176
          entropy_coeff: 0.009999999999999998
          kl: 0.012222824743349273
          policy_loss: -0.060813744014336
          total_loss: -0.0649646203344067
          vf_explained_var: -0.5446736812591553
          vf_loss: 1.273267641484684e-06
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000
  iterations_since_restore: 709
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,709,17115.2,709000,0,0,0,320.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-09_03-09-57
  done: false
  episode_len_mean: 319.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1935
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4969821015993754
          entropy_coeff: 0.009999999999999998
          kl: 0.008275078306136842
          policy_loss: -0.051259544781512685
          total_loss: -0.05915482648544841
          vf_explained_var: -0.8331475257873535
          vf_loss: 5.165311191351268e-06
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 710000
  iterations_since_restore: 710
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,710,17138.8,710000,0,0,0,319.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-09_03-10-24
  done: false
  episode_len_mean: 315.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1939
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5512034310234917
          entropy_coeff: 0.009999999999999998
          kl: 0.009101366414327848
          policy_loss: -0.0955436228464047
          total_loss: -0.10312734097242356
          vf_explained_var: -0.9929533004760742
          vf_loss: 0.00015304630655161116
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000
  iterations_since_restore: 711
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,711,17165.8,711000,0,0,0,315.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-09_03-10-52
  done: false
  episode_len_mean: 314.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1942
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4858536097738477
          entropy_coeff: 0.009999999999999998
          kl: 0.014670901638987387
          policy_loss: -0.032409358790351284
          total_loss: -0.03466046940949228
          vf_explained_var: -0.5646260380744934
          vf_loss: 7.411921553486738e-05
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 712000
  iterations_since_restore: 712
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,712,17194.1,712000,0,0,0,314.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-09_03-11-16
  done: false
  episode_len_mean: 312.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1945
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6376646863089668
          entropy_coeff: 0.009999999999999998
          kl: 0.012954973003878222
          policy_loss: -0.09303082707855437
          total_loss: -0.09833795204758644
          vf_explained_var: -0.11930091679096222
          vf_loss: 2.129602627418434e-06
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000
  iterations_since_restore: 713
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,713,17218.1,713000,0,0,0,312.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-09_03-11-40
  done: false
  episode_len_mean: 313.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1948
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.485998797416687
          entropy_coeff: 0.009999999999999998
          kl: 0.01151831214652156
          policy_loss: -0.10555417728092935
          total_loss: -0.11057249469061693
          vf_explained_var: 0.3189179599285126
          vf_loss: 1.6126997309129365e-06
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
  iterations_since_restore: 714
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,714,17241.9,714000,0,0,0,313.69




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-09_03-12-24
  done: false
  episode_len_mean: 313.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1951
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6208865920702615
          entropy_coeff: 0.009999999999999998
          kl: 0.010826286549495473
          policy_loss: -0.013537829886707995
          total_loss: -0.02049673141704665
          vf_explained_var: -0.5451164841651917
          vf_loss: 1.0995371376844155e-06
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715000
  iterations_since_restore: 715
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,715,17285.7,715000,0,0,0,313.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-09_03-12-50
  done: false
  episode_len_mean: 314.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1954
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5445375892851088
          entropy_coeff: 0.009999999999999998
          kl: 0.014323911705625076
          policy_loss: -0.07104891972202394
          total_loss: -0.07425602519263824
          vf_explained_var: 0.316494882106781
          vf_loss: 1.398415092050224e-06
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000
  iterations_since_restore: 716
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,716,17311.8,716000,0,0,0,314.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-09_03-13-20
  done: false
  episode_len_mean: 314.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1958
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.453392297691769
          entropy_coeff: 0.009999999999999998
          kl: 0.009724297572868008
          policy_loss: -0.04891327689919207
          total_loss: -0.05513889048662451
          vf_explained_var: -0.1645144820213318
          vf_loss: 8.725831656243422e-07
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000
  iterations_since_restore: 717
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,717,17341.5,717000,0,0,0,314.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-09_03-13-50
  done: false
  episode_len_mean: 314.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1962
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.733722178141276
          entropy_coeff: 0.009999999999999998
          kl: 0.009541216317108258
          policy_loss: -0.042617218589617145
          total_loss: -0.051802884725232916
          vf_explained_var: -0.40030038356781006
          vf_loss: 5.264194710561141e-07
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 718000
  iterations_since_restore: 718
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,718,17372,718000,0,0,0,314.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-09_03-14-18
  done: false
  episode_len_mean: 313.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1965
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6306991855303445
          entropy_coeff: 0.009999999999999998
          kl: 0.012027242182161973
          policy_loss: -0.08558821727832158
          total_loss: -0.09161966658300824
          vf_explained_var: -0.240436390042305
          vf_loss: 7.084787726954447e-07
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
  iterations_since_restore: 719
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,719,17399.5,719000,0,0,0,313.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-09_03-14-46
  done: false
  episode_len_mean: 314.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1968
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6377931276957194
          entropy_coeff: 0.009999999999999998
          kl: 0.012781112964114803
          policy_loss: -0.13507210148705376
          total_loss: -0.14053033938010534
          vf_explained_var: -0.8415254950523376
          vf_loss: 8.310336391382912e-07
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000
  iterations_since_restore: 720
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,720,17427.4,720000,0,0,0,314.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-09_03-15-16
  done: false
  episode_len_mean: 311.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1972
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7555515474743313
          entropy_coeff: 0.009999999999999998
          kl: 0.011765559435669894
          policy_loss: -0.10217483138872517
          total_loss: -0.10967866033315658
          vf_explained_var: -0.5539149641990662
          vf_loss: 4.0977467234723814e-07
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  iterations_since_restore: 721
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,721,17457.5,721000,0,0,0,311.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-09_03-15-45
  done: false
  episode_len_mean: 309.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1976
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.594639335738288
          entropy_coeff: 0.009999999999999998
          kl: 0.009794477423225029
          policy_loss: -0.09816063866019249
          total_loss: -0.10573928389284346
          vf_explained_var: -0.5867311358451843
          vf_loss: 3.5826155308566537e-07
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  iterations_since_restore: 722
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,722,17486.7,722000,0,0,0,309.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-09_03-16-14
  done: false
  episode_len_mean: 309.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1979
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.549661033683353
          entropy_coeff: 0.009999999999999998
          kl: 0.009611550201481123
          policy_loss: -0.04301507675813304
          total_loss: -0.050300135277211666
          vf_explained_var: -0.22131067514419556
          vf_loss: 4.366068045176993e-07
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  iterations_since_restore: 723
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,723,17515.7,723000,0,0,0,309.95




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-09_03-16-55
  done: false
  episode_len_mean: 310.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1982
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7450491375393338
          entropy_coeff: 0.009999999999999998
          kl: 0.012540449921966533
          policy_loss: -0.07129738715787728
          total_loss: -0.07803388887809383
          vf_explained_var: -1.0
          vf_loss: 7.228495099474862e-07
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000
  iterations_since_restore: 724
  node_ip: 192.168.3.5
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,724,17556.6,724000,0,0,0,310.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-09_03-17-21
  done: false
  episode_len_mean: 311.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1985
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5848642230033874
          entropy_coeff: 0.009999999999999998
          kl: 0.00973919172093171
          policy_loss: -0.05597200420581632
          total_loss: -0.06349992423007886
          vf_explained_var: -0.35739409923553467
          vf_loss: 5.590051361852804e-07
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
  iterations_since_restore: 725
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,725,17582.1,725000,0,0,0,311.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-09_03-17-43
  done: false
  episode_len_mean: 312.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1988
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6512149492899577
          entropy_coeff: 0.009999999999999998
          kl: 0.008925401981109183
          policy_loss: -0.04939284266697036
          total_loss: -0.05827925726771355
          vf_explained_var: -0.8705670833587646
          vf_loss: 7.928832976252427e-07
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
  iterations_since_restore: 726
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,726,17604.9,726000,0,0,0,312.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-09_03-18-08
  done: false
  episode_len_mean: 312.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1991
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4725940585136414
          entropy_coeff: 0.009999999999999998
          kl: 0.009092104175382568
          policy_loss: -0.05209058444533083
          total_loss: -0.059048681747582224
          vf_explained_var: -0.9998607039451599
          vf_loss: 4.866699776408091e-07
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
  iterations_since_restore: 727
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,727,17629.5,727000,0,0,0,312.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-09_03-18-31
  done: false
  episode_len_mean: 311.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1994
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6068017721176147
          entropy_coeff: 0.009999999999999998
          kl: 0.011380854317234382
          policy_loss: -0.025666266100274192
          total_loss: -0.032011016334096594
          vf_explained_var: -0.6478455662727356
          vf_loss: 6.386694546260212e-07
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
  iterations_since_restore: 728
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,728,17653,728000,0,0,0,311.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-09_03-18-57
  done: false
  episode_len_mean: 311.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1997
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6996303174230787
          entropy_coeff: 0.009999999999999998
          kl: 0.012666019380074861
          policy_loss: -0.07261070313139094
          total_loss: -0.07878566061456999
          vf_explained_var: 0.010960999876260757
          vf_loss: 8.053025999919959e-07
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  iterations_since_restore: 729
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,729,17678.7,729000,0,0,0,311.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-09_03-19-25
  done: false
  episode_len_mean: 310.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2000
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6479937473932902
          entropy_coeff: 0.009999999999999998
          kl: 0.01231297772258715
          policy_loss: -0.07430308345291349
          total_loss: -0.08023445382714271
          vf_explained_var: -1.0
          vf_loss: 2.9628605862007538e-05
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
  iterations_since_restore: 730
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,730,17706.5,730000,0,0,0,310.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-09_03-19-50
  done: false
  episode_len_mean: 309.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2004
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4873791019121805
          entropy_coeff: 0.009999999999999998
          kl: 0.0083429151248762
          policy_loss: -0.09865475706756115
          total_loss: -0.10639315119220151
          vf_explained_var: 0.15783223509788513
          vf_loss: 8.071250037270754e-06
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
  iterations_since_restore: 731
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,731,17731.6,731000,0,0,0,309.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-09_03-20-14
  done: false
  episode_len_mean: 310.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2007
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6029941240946453
          entropy_coeff: 0.009999999999999998
          kl: 0.009344744493844499
          policy_loss: 0.04168776431017452
          total_loss: 0.03379507453905212
          vf_explained_var: -0.7581789493560791
          vf_loss: 0.0001540654697237187
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
  iterations_since_restore: 732
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,732,17755.9,732000,0,0,0,310.77




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-09_03-20-55
  done: false
  episode_len_mean: 312.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2010
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.530601147810618
          entropy_coeff: 0.009999999999999998
          kl: 0.015089699762667802
          policy_loss: -0.014097196298340957
          total_loss: -0.016506548039615154
          vf_explained_var: 0.0036430193576961756
          vf_loss: 5.577313185590861e-06
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000
  iterations_since_restore: 733
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,733,17796.4,733000,0,0,0,312.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-09_03-21-24
  done: false
  episode_len_mean: 312.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2013
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.734325737423367
          entropy_coeff: 0.009999999999999998
          kl: 0.010085437789608735
          policy_loss: -0.09455342673593098
          total_loss: -0.10327845149570042
          vf_explained_var: -0.011898611672222614
          vf_loss: 2.273853577763576e-06
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  iterations_since_restore: 734
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,734,17825.1,734000,0,0,0,312.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-09_03-21-50
  done: false
  episode_len_mean: 311.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2016
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.824486373530494
          entropy_coeff: 0.009999999999999998
          kl: 0.012174584218252566
          policy_loss: -0.10445859361853864
          total_loss: -0.11230179833041297
          vf_explained_var: -0.11891327053308487
          vf_loss: 9.511393134036754e-07
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
  iterations_since_restore: 735
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,735,17851.4,735000,0,0,0,311.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-09_03-22-13
  done: false
  episode_len_mean: 313.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2019
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.563333645131853
          entropy_coeff: 0.009999999999999998
          kl: 0.012997542167953036
          policy_loss: -0.10170912746753957
          total_loss: -0.10623689645694362
          vf_explained_var: 0.22227564454078674
          vf_loss: 1.8081189791827355e-06
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
  iterations_since_restore: 736
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,736,17874.7,736000,0,0,0,313.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-09_03-22-40
  done: false
  episode_len_mean: 313.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2022
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.832204814751943
          entropy_coeff: 0.009999999999999998
          kl: 0.014101792475553904
          policy_loss: -0.042924921007619965
          total_loss: -0.049199058984716736
          vf_explained_var: 0.018423954024910927
          vf_loss: 7.92579981773795e-07
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  iterations_since_restore: 737
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,737,17901.4,737000,0,0,0,313.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-09_03-23-05
  done: false
  episode_len_mean: 314.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2025
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7530284100108677
          entropy_coeff: 0.009999999999999998
          kl: 0.010862070226251082
          policy_loss: -0.07467944398522378
          total_loss: -0.08292952838043371
          vf_explained_var: -0.2577785849571228
          vf_loss: 7.687730844610188e-07
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
  iterations_since_restore: 738
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,738,17926.2,738000,0,0,0,314.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-09_03-23-28
  done: false
  episode_len_mean: 316.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2028
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.786910178926256
          entropy_coeff: 0.009999999999999998
          kl: 0.009371481908844122
          policy_loss: -0.08998204277207454
          total_loss: -0.0998439417531093
          vf_explained_var: -0.07867691665887833
          vf_loss: 1.1775606705239422e-06
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000
  iterations_since_restore: 739
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,739,17949.5,739000,0,0,0,316.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-09_03-23-53
  done: false
  episode_len_mean: 317.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2031
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.76658140818278
          entropy_coeff: 0.009999999999999998
          kl: 0.01009091208474921
          policy_loss: -0.10030310977664258
          total_loss: -0.10933905562592877
          vf_explained_var: -0.8068065047264099
          vf_loss: 9.236630183472193e-06
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000
  iterations_since_restore: 740
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,740,17974.5,740000,0,0,0,317.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-09_03-24-19
  done: false
  episode_len_mean: 315.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2034
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8364058627022637
          entropy_coeff: 0.009999999999999998
          kl: 0.011685437604648522
          policy_loss: -0.04056745105319553
          total_loss: -0.04894569859736496
          vf_explained_var: 0.07166561484336853
          vf_loss: 2.9772331888327445e-06
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
  iterations_since_restore: 741
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,741,18000.1,741000,0,0,0,315.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-09_03-24-45
  done: false
  episode_len_mean: 315.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2037
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7900562829441495
          entropy_coeff: 0.009999999999999998
          kl: 0.011390823566779515
          policy_loss: -0.02906895929740535
          total_loss: -0.03723715667923291
          vf_explained_var: 0.09101387858390808
          vf_loss: 1.2206393910850642e-06
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
  iterations_since_restore: 742
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,742,18026.3,742000,0,0,0,315.71




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-09_03-25-29
  done: false
  episode_len_mean: 315.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2040
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.783136092291938
          entropy_coeff: 0.009999999999999998
          kl: 0.010435201001493036
          policy_loss: -0.06376681878334946
          total_loss: -0.07268065065145493
          vf_explained_var: -0.4024255573749542
          vf_loss: 2.7702080068871105e-06
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  iterations_since_restore: 743
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,743,18070.2,743000,0,0,0,315.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-09_03-25-57
  done: false
  episode_len_mean: 316.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2043
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7907820489671495
          entropy_coeff: 0.009999999999999998
          kl: 0.015640148344929446
          policy_loss: -0.17754789942668545
          total_loss: -0.18209349984923998
          vf_explained_var: -0.2379390299320221
          vf_loss: 8.911794610513526e-07
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  iterations_since_restore: 744
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,744,18098.3,744000,0,0,0,316.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-09_03-26-23
  done: false
  episode_len_mean: 316.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2046
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.900769707891676
          entropy_coeff: 0.009999999999999998
          kl: 0.0098570208602524
          policy_loss: -0.14780182507303025
          total_loss: -0.15838785486088858
          vf_explained_var: -0.30203038454055786
          vf_loss: 8.447463493944775e-07
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745000
  iterations_since_restore: 745
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,745,18123.9,745000,0,0,0,316.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-09_03-26-48
  done: false
  episode_len_mean: 315.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2050
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7658071080843607
          entropy_coeff: 0.009999999999999998
          kl: 0.008337717559116796
          policy_loss: -0.027472754785170157
          total_loss: -0.037977813929319384
          vf_explained_var: -0.9773668050765991
          vf_loss: 3.0127527000761096e-05
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
  iterations_since_restore: 746
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,746,18149.5,746000,0,0,0,315.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-09_03-27-15
  done: false
  episode_len_mean: 314.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2053
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.959841521581014
          entropy_coeff: 0.009999999999999998
          kl: 0.010082585788832314
          policy_loss: -0.01852998174726963
          total_loss: -0.029500016756355763
          vf_explained_var: -0.724087119102478
          vf_loss: 1.4858689900797293e-05
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 747000
  iterations_since_restore: 747
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,747,18176.3,747000,0,0,0,314.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-09_03-27-39
  done: false
  episode_len_mean: 315.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2056
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5978598011864555
          entropy_coeff: 0.009999999999999998
          kl: 0.012699378830645296
          policy_loss: -0.08000874167515172
          total_loss: -0.08513682712283399
          vf_explained_var: -0.04613958299160004
          vf_loss: 1.4744314057679731e-06
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
  iterations_since_restore: 748
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,748,18200.2,748000,0,0,0,315.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-09_03-28-00
  done: false
  episode_len_mean: 318.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2058
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.3634243720107608
          entropy_coeff: 0.009999999999999998
          kl: 0.008920881125308247
          policy_loss: -0.08085431071619192
          total_loss: -0.08662165519264009
          vf_explained_var: -0.8853527307510376
          vf_loss: 0.0002458183430399509
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000
  iterations_since_restore: 749
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,749,18221,749000,0,0,0,318.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-09_03-28-28
  done: false
  episode_len_mean: 320.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2062
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6918961352772184
          entropy_coeff: 0.009999999999999998
          kl: 0.009096748368844684
          policy_loss: -0.014290898044904073
          total_loss: -0.02343608567284213
          vf_explained_var: 0.04988275468349457
          vf_loss: 2.4501445472601013e-06
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
  iterations_since_restore: 750
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,750,18248.8,750000,0,0,0,320.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-09_03-28-51
  done: false
  episode_len_mean: 322.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2064
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6661570919884576
          entropy_coeff: 0.009999999999999998
          kl: 0.012009894004319458
          policy_loss: -0.1317499182290501
          total_loss: -0.1381498488287131
          vf_explained_var: 0.19929920136928558
          vf_loss: 1.6249869190687604e-06
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000
  iterations_since_restore: 751
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,751,18272,751000,0,0,0,322.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-09_03-29-17
  done: false
  episode_len_mean: 322.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2067
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8654538949330648
          entropy_coeff: 0.009999999999999998
          kl: 0.010610116767050492
          policy_loss: -0.11375984590914515
          total_loss: -0.12334952300621403
          vf_explained_var: 0.1469871997833252
          vf_loss: 6.727386000370138e-07
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 752000
  iterations_since_restore: 752
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,752,18297.7,752000,0,0,0,322.21




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-09_03-29-59
  done: false
  episode_len_mean: 323.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2070
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.831509244441986
          entropy_coeff: 0.009999999999999998
          kl: 0.009827844032813528
          policy_loss: -0.04643923425012165
          total_loss: -0.05635777037176821
          vf_explained_var: -0.15363316237926483
          vf_loss: 6.595886557331849e-07
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
  iterations_since_restore: 753
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,753,18340.4,753000,0,0,0,323.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-09_03-30-28
  done: false
  episode_len_mean: 325.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2074
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8133332981003656
          entropy_coeff: 0.009999999999999998
          kl: 0.01160732094550538
          policy_loss: -0.11618759305112891
          total_loss: -0.12440406245489916
          vf_explained_var: -0.4742463529109955
          vf_loss: 7.64299182283897e-07
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000
  iterations_since_restore: 754
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,754,18368.9,754000,0,0,0,325.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-09_03-30-58
  done: false
  episode_len_mean: 326.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2077
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.783071047729916
          entropy_coeff: 0.009999999999999998
          kl: 0.01122844105330503
          policy_loss: -0.03218956519332197
          total_loss: -0.04042746004545027
          vf_explained_var: -0.9231888651847839
          vf_loss: 3.9197699474395147e-07
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000
  iterations_since_restore: 755
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,755,18398.5,755000,0,0,0,326.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-09_03-31-26
  done: false
  episode_len_mean: 328.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2080
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.792920626534356
          entropy_coeff: 0.009999999999999998
          kl: 0.0089146719865916
          policy_loss: -0.13065373359455001
          total_loss: -0.14096674765977596
          vf_explained_var: -0.7787492275238037
          vf_loss: 4.177085511653584e-07
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
  iterations_since_restore: 756
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,756,18427,756000,0,0,0,328.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-09_03-31-57
  done: false
  episode_len_mean: 326.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2083
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.679903311199612
          entropy_coeff: 0.009999999999999998
          kl: 0.009458776932639128
          policy_loss: -0.09790388393319316
          total_loss: -0.10662186137504048
          vf_explained_var: -0.6749874353408813
          vf_loss: 4.5227234848097295e-07
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000
  iterations_since_restore: 757
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,757,18457.8,757000,0,0,0,326.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-09_03-32-28
  done: false
  episode_len_mean: 325.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2087
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.668741810321808
          entropy_coeff: 0.009999999999999998
          kl: 0.010872957445832101
          policy_loss: -0.0968384557714065
          total_loss: -0.10423682588669989
          vf_explained_var: -0.6443342566490173
          vf_loss: 3.143227293852154e-07
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000
  iterations_since_restore: 758
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,758,18489.3,758000,0,0,0,325.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-09_03-32-59
  done: false
  episode_len_mean: 323.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2090
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8092007557551066
          entropy_coeff: 0.009999999999999998
          kl: 0.011183388289211867
          policy_loss: -0.05545600404342015
          total_loss: -0.06399364291379848
          vf_explained_var: -0.5179634094238281
          vf_loss: 4.3247143932174164e-07
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000
  iterations_since_restore: 759
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,759,18520.2,759000,0,0,0,323.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-09_03-33-27
  done: false
  episode_len_mean: 323.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2093
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5859637392891779
          entropy_coeff: 0.009999999999999998
          kl: 0.010506643437124409
          policy_loss: -0.11742646633647383
          total_loss: -0.12430939165771836
          vf_explained_var: 0.010762609541416168
          vf_loss: 9.221812471575201e-07
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  iterations_since_restore: 760
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,760,18547.5,760000,0,0,0,323.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-09_03-33-52
  done: false
  episode_len_mean: 323.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2096
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.841993890868293
          entropy_coeff: 0.009999999999999998
          kl: 0.011430107379435114
          policy_loss: -0.10550974003142781
          total_loss: -0.1141644614852137
          vf_explained_var: -1.0
          vf_loss: 5.148485113990218e-07
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  iterations_since_restore: 761
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,761,18573.2,761000,0,0,0,323.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-09_03-34-20
  done: false
  episode_len_mean: 324.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2099
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9774780604574416
          entropy_coeff: 0.009999999999999998
          kl: 0.012173063002748247
          policy_loss: -0.11739538411299387
          total_loss: -0.12677041846844886
          vf_explained_var: -0.5107607245445251
          vf_loss: 3.374371840910347e-07
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 762000
  iterations_since_restore: 762
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,762,18601.1,762000,0,0,0,324.18




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-09_03-35-07
  done: false
  episode_len_mean: 324.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2102
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8444464908705818
          entropy_coeff: 0.009999999999999998
          kl: 0.011294743552635841
          policy_loss: -0.0859597347676754
          total_loss: -0.09475475572463539
          vf_explained_var: -0.4872480034828186
          vf_loss: 3.837107472451054e-07
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
  iterations_since_restore: 763
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,763,18647.8,763000,0,0,0,324.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-09_03-35-33
  done: false
  episode_len_mean: 323.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2106
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.806043146716224
          entropy_coeff: 0.009999999999999998
          kl: 0.013209376142786016
          policy_loss: -0.042181867650813526
          total_loss: -0.048956222542458114
          vf_explained_var: -0.491035133600235
          vf_loss: 1.3465136027611556e-06
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  iterations_since_restore: 764
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,764,18673.5,764000,0,0,0,323.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-09_03-35-59
  done: false
  episode_len_mean: 323.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2109
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5244077444076538
          entropy_coeff: 0.009999999999999998
          kl: 0.011801134318871772
          policy_loss: -0.02779403527577718
          total_loss: -0.03295497807363669
          vf_explained_var: -0.36402252316474915
          vf_loss: 1.4618035256338164e-06
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
  iterations_since_restore: 765
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,765,18699.9,765000,0,0,0,323.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-09_03-36-26
  done: false
  episode_len_mean: 322.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2112
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5175862471262613
          entropy_coeff: 0.009999999999999998
          kl: 0.010920637669631637
          policy_loss: -0.06053648342688878
          total_loss: -0.0663143997391065
          vf_explained_var: -1.0
          vf_loss: 6.847833462997086e-05
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000
  iterations_since_restore: 766
  node_ip: 192.168.3.5
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,766,18727.2,766000,0,0,0,322.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-09_03-36-51
  done: false
  episode_len_mean: 321.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2115
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8212525553173489
          entropy_coeff: 0.009999999999999998
          kl: 0.010821180569144682
          policy_loss: -0.13708557047777706
          total_loss: -0.1460496282411946
          vf_explained_var: 0.5343307852745056
          vf_loss: 3.964166978044482e-06
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  iterations_since_restore: 767
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,767,18752.1,767000,0,0,0,321.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-09_03-37-14
  done: false
  episode_len_mean: 323.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2118
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6905681702825759
          entropy_coeff: 0.009999999999999998
          kl: 0.012074907445930864
          policy_loss: -0.05501735905806224
          total_loss: -0.06160464220576816
          vf_explained_var: 0.02705862559378147
          vf_loss: 2.844136875208076e-06
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768000
  iterations_since_restore: 768
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,768,18775,768000,0,0,0,323.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-09_03-37-43
  done: false
  episode_len_mean: 321.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2122
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5431835929552713
          entropy_coeff: 0.009999999999999998
          kl: 0.010750159688576764
          policy_loss: -0.1269443008220858
          total_loss: -0.13294727090332242
          vf_explained_var: -0.8809951543807983
          vf_loss: 0.00024503601354505615
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000
  iterations_since_restore: 769
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,769,18803.3,769000,0,0,0,321.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-09_03-38-09
  done: false
  episode_len_mean: 320.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2125
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7294867277145385
          entropy_coeff: 0.009999999999999998
          kl: 0.012604562753759277
          policy_loss: -0.07316613060732682
          total_loss: -0.07967325668368075
          vf_explained_var: 0.19130903482437134
          vf_loss: 1.970184677146689e-05
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000
  iterations_since_restore: 770
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,770,18829.2,770000,0,0,0,320.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-09_03-38-36
  done: false
  episode_len_mean: 318.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2128
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6264138831032646
          entropy_coeff: 0.009999999999999998
          kl: 0.009673251791053812
          policy_loss: -0.0814509325557285
          total_loss: -0.08944683443340991
          vf_explained_var: 0.10414735972881317
          vf_loss: 4.409176115890053e-06
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000
  iterations_since_restore: 771
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,771,18857,771000,0,0,0,318.51




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-09_03-39-24
  done: false
  episode_len_mean: 316.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2132
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4900380664401585
          entropy_coeff: 0.009999999999999998
          kl: 0.014570935720327485
          policy_loss: -0.12175646407736672
          total_loss: -0.12419611629512575
          vf_explained_var: 0.1785680204629898
          vf_loss: 1.2822072196488281e-05
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772000
  iterations_since_restore: 772
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,772,18904.3,772000,0,0,0,316.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-09_03-39-52
  done: false
  episode_len_mean: 314.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2135
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.617617991235521
          entropy_coeff: 0.009999999999999998
          kl: 0.01080627169958714
          policy_loss: -0.13306364255646866
          total_loss: -0.14000655569963985
          vf_explained_var: 0.40648162364959717
          vf_loss: 1.501142677777049e-06
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  iterations_since_restore: 773
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,773,18933,773000,0,0,0,314.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-09_03-40-18
  done: false
  episode_len_mean: 314.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2138
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5508523821830749
          entropy_coeff: 0.009999999999999998
          kl: 0.011220533667922004
          policy_loss: -0.0589353781607416
          total_loss: -0.06485466824637519
          vf_explained_var: 0.5511677265167236
          vf_loss: 3.566256158491241e-06
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
  iterations_since_restore: 774
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,774,18958.7,774000,0,0,0,314.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-09_03-40-47
  done: false
  episode_len_mean: 314.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2142
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8009251475334167
          entropy_coeff: 0.009999999999999998
          kl: 0.014082210717909919
          policy_loss: -0.06367634901156029
          total_loss: -0.06965254118873013
          vf_explained_var: -0.5217905640602112
          vf_loss: 2.6727863718810518e-06
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000
  iterations_since_restore: 775
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,775,18987.2,775000,0,0,0,314.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-09_03-41-13
  done: false
  episode_len_mean: 314.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2145
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5517848465177748
          entropy_coeff: 0.009999999999999998
          kl: 0.011451266507196243
          policy_loss: -0.10821523569110367
          total_loss: -0.11344670795111192
          vf_explained_var: -0.8648774027824402
          vf_loss: 0.0005035948758493052
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
  iterations_since_restore: 776
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,776,19013.4,776000,0,0,0,314.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-09_03-41-39
  done: false
  episode_len_mean: 315.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2148
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5897403915723165
          entropy_coeff: 0.009999999999999998
          kl: 0.011313747992787173
          policy_loss: -0.08389226854261425
          total_loss: -0.0900897841486666
          vf_explained_var: 0.11080636084079742
          vf_loss: 3.4588999490855106e-05
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000
  iterations_since_restore: 777
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,777,19039.8,777000,0,0,0,315.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-09_03-42-03
  done: false
  episode_len_mean: 315.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2151
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5868738929430644
          entropy_coeff: 0.009999999999999998
          kl: 0.012899421114184169
          policy_loss: -0.08191186185512278
          total_loss: -0.08674808020393053
          vf_explained_var: 0.44610628485679626
          vf_loss: 1.2585785553811648e-05
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000
  iterations_since_restore: 778
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,778,19063.9,778000,0,0,0,315.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-09_03-42-28
  done: false
  episode_len_mean: 316.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2154
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.630061717828115
          entropy_coeff: 0.009999999999999998
          kl: 0.011180775227076998
          policy_loss: -0.05755988624360826
          total_loss: -0.0642961839834849
          vf_explained_var: 0.1323682963848114
          vf_loss: 1.2618287451005017e-05
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 779000
  iterations_since_restore: 779
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,779,19088.6,779000,0,0,0,316.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-09_03-42-55
  done: false
  episode_len_mean: 315.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2157
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6375607146157158
          entropy_coeff: 0.009999999999999998
          kl: 0.012177396585301798
          policy_loss: -0.0747776468594869
          total_loss: -0.08074518951276938
          vf_explained_var: -0.10805872082710266
          vf_loss: 4.955171050369245e-06
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780000
  iterations_since_restore: 780
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,780,19115.3,780000,0,0,0,315.2




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-09_03-43-35
  done: false
  episode_len_mean: 315.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2160
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6253913349575466
          entropy_coeff: 0.009999999999999998
          kl: 0.011033776221442141
          policy_loss: -0.12413512567679087
          total_loss: -0.13095866048501598
          vf_explained_var: 0.721509575843811
          vf_loss: 4.2573274249156784e-06
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 781000
  iterations_since_restore: 781
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,781,19156,781000,0,0,0,315.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-09_03-44-04
  done: false
  episode_len_mean: 314.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2163
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7338624040285746
          entropy_coeff: 0.009999999999999998
          kl: 0.014561819439807284
          policy_loss: -0.07861331916517682
          total_loss: -0.08350918682085143
          vf_explained_var: 0.32858720421791077
          vf_loss: 2.6410374238771006e-06
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782000
  iterations_since_restore: 782
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,782,19184.6,782000,0,0,0,314.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-09_03-44-30
  done: false
  episode_len_mean: 313.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6189279423819647
          entropy_coeff: 0.009999999999999998
          kl: 0.008806001018881965
          policy_loss: 0.01074545087499751
          total_loss: 0.002081656963047054
          vf_explained_var: 0.16335701942443848
          vf_loss: 2.5456937932075462e-06
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
  iterations_since_restore: 783
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,783,19210,783000,0,0,0,313.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-09_03-44-52
  done: false
  episode_len_mean: 314.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2169
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6811680767271253
          entropy_coeff: 0.009999999999999998
          kl: 0.013295448198695325
          policy_loss: -0.02770413060983022
          total_loss: -0.033155840552515456
          vf_explained_var: 0.49653807282447815
          vf_loss: 1.7091365773719896e-06
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000
  iterations_since_restore: 784
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,784,19232.9,784000,0,0,0,314.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-09_03-45-20
  done: false
  episode_len_mean: 314.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2172
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5795822421709695
          entropy_coeff: 0.009999999999999998
          kl: 0.006447785028678786
          policy_loss: -0.0998064255134927
          total_loss: -0.11009246760772334
          vf_explained_var: -0.08120222389698029
          vf_loss: 1.4584988826350632e-06
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 785000
  iterations_since_restore: 785
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,785,19260.5,785000,0,0,0,314.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-09_03-45-46
  done: false
  episode_len_mean: 314.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2175
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6054238239924112
          entropy_coeff: 0.009999999999999998
          kl: 0.011505366077236682
          policy_loss: -0.07487168096833759
          total_loss: -0.08109506890177727
          vf_explained_var: -0.061049364507198334
          vf_loss: 1.852468418898449e-06
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000
  iterations_since_restore: 786
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,786,19286.8,786000,0,0,0,314.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-09_03-46-14
  done: false
  episode_len_mean: 314.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2179
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8567001422246296
          entropy_coeff: 0.009999999999999998
          kl: 0.011809588915988932
          policy_loss: -0.11445940329382817
          total_loss: -0.12293633278459311
          vf_explained_var: -0.1881825178861618
          vf_loss: 1.1791940102688791e-06
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 787000
  iterations_since_restore: 787
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,787,19314.6,787000,0,0,0,314.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-09_03-46-43
  done: false
  episode_len_mean: 313.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2182
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9076734913720026
          entropy_coeff: 0.009999999999999998
          kl: 0.011421939324984908
          policy_loss: -0.06716158632189036
          total_loss: -0.07647966800464524
          vf_explained_var: -0.21353334188461304
          vf_loss: 9.251711907786052e-07
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 788000
  iterations_since_restore: 788
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,788,19342.9,788000,0,0,0,313.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-09_03-47-08
  done: false
  episode_len_mean: 315.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2185
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8743728280067444
          entropy_coeff: 0.009999999999999998
          kl: 0.010366171186508883
          policy_loss: -0.08842978071835306
          total_loss: -0.09831674438383844
          vf_explained_var: -0.5647748112678528
          vf_loss: 9.77160465254201e-07
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 789000
  iterations_since_restore: 789
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,789,19368.9,789000,0,0,0,315.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-09_03-47-33
  done: false
  episode_len_mean: 315.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2188
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.605945756700304
          entropy_coeff: 0.009999999999999998
          kl: 0.01036495848065218
          policy_loss: -0.08282378255907032
          total_loss: -0.09002731167193916
          vf_explained_var: -0.37264490127563477
          vf_loss: 1.179551159364362e-06
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 790000
  iterations_since_restore: 790
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,790,19393.3,790000,0,0,0,315.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-09_03-48-11
  done: false
  episode_len_mean: 318.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2190
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.2879401273197597
          entropy_coeff: 0.009999999999999998
          kl: 0.009765914291842264
          policy_loss: -0.12170433704223897
          total_loss: -0.1262392177556952
          vf_explained_var: -0.4924606680870056
          vf_loss: 1.5321668090311707e-06
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 791000
  iterations_since_restore: 791
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,791,19431.3,791000,0,0,0,318.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-09_03-48-38
  done: false
  episode_len_mean: 317.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2194
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.602612316608429
          entropy_coeff: 0.009999999999999998
          kl: 0.010323610828894578
          policy_loss: -0.016494466695520613
          total_loss: -0.02369995228946209
          vf_explained_var: -0.26845425367355347
          vf_loss: 1.2076675978455064e-06
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000
  iterations_since_restore: 792
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,792,19458.1,792000,0,0,0,317.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-09_03-49-03
  done: false
  episode_len_mean: 317.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2197
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8099547717306348
          entropy_coeff: 0.009999999999999998
          kl: 0.011339118831824358
          policy_loss: -0.0646489438497358
          total_loss: -0.07306080297049548
          vf_explained_var: -0.8194042444229126
          vf_loss: 7.177664833256535e-07
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 793000
  iterations_since_restore: 793
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,793,19483.7,793000,0,0,0,317.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-09_03-49-30
  done: false
  episode_len_mean: 316.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2200
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6886378500196668
          entropy_coeff: 0.009999999999999998
          kl: 0.007076274427030822
          policy_loss: -0.01259294371638033
          total_loss: -0.023433645069599152
          vf_explained_var: -0.8506650924682617
          vf_loss: 4.3889057018810086e-07
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 794000
  iterations_since_restore: 794
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,794,19510.7,794000,0,0,0,316.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-09_03-49-56
  done: false
  episode_len_mean: 318.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2203
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6696473757425945
          entropy_coeff: 0.009999999999999998
          kl: 0.011284758751572829
          policy_loss: -0.08316960438258118
          total_loss: -0.09022497936255403
          vf_explained_var: -0.4438120126724243
          vf_loss: 5.642354546845733e-07
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000
  iterations_since_restore: 795
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,795,19536.2,795000,0,0,0,318.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-09_03-50-25
  done: false
  episode_len_mean: 317.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2207
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7002815471755133
          entropy_coeff: 0.009999999999999998
          kl: 0.0080088066472975
          policy_loss: -0.027785642652048005
          total_loss: -0.03794623956912094
          vf_explained_var: -0.466817170381546
          vf_loss: 3.18877939283387e-07
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 796000
  iterations_since_restore: 796
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,796,19565.2,796000,0,0,0,317.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-09_03-50-52
  done: false
  episode_len_mean: 316.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2210
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.566080203321245
          entropy_coeff: 0.009999999999999998
          kl: 0.011497288874473632
          policy_loss: -0.04868770738442739
          total_loss: -0.05452591251167986
          vf_explained_var: -0.4863876402378082
          vf_loss: 4.972155860766886e-07
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797000
  iterations_since_restore: 797
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,797,19591.7,797000,0,0,0,316.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-09_03-51-18
  done: false
  episode_len_mean: 317.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2213
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7536285175217523
          entropy_coeff: 0.009999999999999998
          kl: 0.012775247095250563
          policy_loss: -0.05031759486430221
          total_loss: -0.056939233715335526
          vf_explained_var: -0.5631661415100098
          vf_loss: 7.919292428937296e-07
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 798000
  iterations_since_restore: 798
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,798,19618.5,798000,0,0,0,317.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-09_03-51-44
  done: false
  episode_len_mean: 317.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2216
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9158896525700888
          entropy_coeff: 0.009999999999999998
          kl: 0.013343439905459656
          policy_loss: -0.03377076685428619
          total_loss: -0.04152976837423113
          vf_explained_var: -0.15570122003555298
          vf_loss: 6.349319608350218e-07
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000
  iterations_since_restore: 799
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,799,19643.9,799000,0,0,0,317.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-09_03-52-09
  done: false
  episode_len_mean: 317.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2219
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7942006376054551
          entropy_coeff: 0.009999999999999998
          kl: 0.009480770637618303
          policy_loss: -0.07553104828629228
          total_loss: -0.08537318896916178
          vf_explained_var: -0.8018102645874023
          vf_loss: 4.7274705063197566e-07
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000
  iterations_since_restore: 800
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,800,19669.5,800000,0,0,0,317.09




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-09_03-52-53
  done: false
  episode_len_mean: 318.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2222
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7490414884355334
          entropy_coeff: 0.009999999999999998
          kl: 0.012183558174927691
          policy_loss: -0.06675747450855044
          total_loss: -0.0738388750081261
          vf_explained_var: -0.2548377215862274
          vf_loss: 6.395784983700044e-07
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
  iterations_since_restore: 801
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,801,19712.8,801000,0,0,0,318.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-09_03-53-13
  done: false
  episode_len_mean: 320.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2224
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 0.9670941087934706
          entropy_coeff: 0.009999999999999998
          kl: 0.0066563310005941885
          policy_loss: -0.1474402968461315
          total_loss: -0.15124681409862306
          vf_explained_var: -0.4158344566822052
          vf_loss: 0.00017794132933456947
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
  iterations_since_restore: 802
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,802,19733.6,802000,0,0,0,320.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-09_03-53-39
  done: false
  episode_len_mean: 322.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2227
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9149847202830845
          entropy_coeff: 0.009999999999999998
          kl: 0.01358877520352492
          policy_loss: -0.1024209413677454
          total_loss: -0.10995662899480926
          vf_explained_var: -0.7270368933677673
          vf_loss: 5.313335338996694e-06
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000
  iterations_since_restore: 803
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,803,19758.8,803000,0,0,0,322.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-09_03-54-05
  done: false
  episode_len_mean: 323.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2231
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9853631496429442
          entropy_coeff: 0.009999999999999998
          kl: 0.011041256217395764
          policy_loss: -0.06041907403204176
          total_loss: -0.0708380085726579
          vf_explained_var: -0.4102773070335388
          vf_loss: 2.185210111823229e-06
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
  iterations_since_restore: 804
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,804,19785.3,804000,0,0,0,323.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-09_03-54-31
  done: false
  episode_len_mean: 324.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2234
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9161228603786893
          entropy_coeff: 0.009999999999999998
          kl: 0.011026805280409599
          policy_loss: -0.07302741677396828
          total_loss: -0.08276726653178533
          vf_explained_var: -0.05073324963450432
          vf_loss: 1.2164496234213907e-06
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 805000
  iterations_since_restore: 805
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,805,19811.4,805000,0,0,0,324.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-09_03-54-55
  done: false
  episode_len_mean: 325.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2236
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9010013421376546
          entropy_coeff: 0.009999999999999998
          kl: 0.015152156035604995
          policy_loss: -0.06297090351581573
          total_loss: -0.06903539266851214
          vf_explained_var: -0.2739347815513611
          vf_loss: 1.0833048640456076e-06
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 806000
  iterations_since_restore: 806
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,806,19835,806000,0,0,0,325.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-09_03-55-20
  done: false
  episode_len_mean: 326.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2239
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9939960969818964
          entropy_coeff: 0.009999999999999998
          kl: 0.009679813721751805
          policy_loss: -0.1241179874787728
          total_loss: -0.13578778567413488
          vf_explained_var: -0.8060210347175598
          vf_loss: 7.284469575097319e-07
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000
  iterations_since_restore: 807
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,807,19860.4,807000,0,0,0,326.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-09_03-55-46
  done: false
  episode_len_mean: 327.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2242
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.019661237133874
          entropy_coeff: 0.009999999999999998
          kl: 0.012566317870766279
          policy_loss: -0.04609036602907711
          total_loss: -0.055551037368261155
          vf_explained_var: -0.304606556892395
          vf_loss: 5.76803127690558e-07
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 808000
  iterations_since_restore: 808
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,808,19885.7,808000,0,0,0,327.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-09_03-56-13
  done: false
  episode_len_mean: 327.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2246
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7093641837437947
          entropy_coeff: 0.009999999999999998
          kl: 0.011074223383992117
          policy_loss: -0.05848925453093317
          total_loss: -0.06612177793350485
          vf_explained_var: -0.474412202835083
          vf_loss: 4.4457279623581296e-07
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
  iterations_since_restore: 809
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,809,19912.9,809000,0,0,0,327.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-09_03-56-38
  done: false
  episode_len_mean: 328.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2249
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.90539118581348
          entropy_coeff: 0.009999999999999998
          kl: 0.015172000594517608
          policy_loss: -0.06849245408342944
          total_loss: -0.07458424725466305
          vf_explained_var: -0.7080700993537903
          vf_loss: 7.298531582162266e-07
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 810000
  iterations_since_restore: 810
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,810,19938.1,810000,0,0,0,328.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-09_03-57-18
  done: false
  episode_len_mean: 327.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2251
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.022738062010871
          entropy_coeff: 0.009999999999999998
          kl: 0.00808605758729191
          policy_loss: -0.11852348301973607
          total_loss: -0.13184235176692408
          vf_explained_var: -0.6703320145606995
          vf_loss: 6.179835730159135e-07
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 811000
  iterations_since_restore: 811
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,811,19978.1,811000,0,0,0,327.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-09_03-57-42
  done: false
  episode_len_mean: 328.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2254
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7908495651351082
          entropy_coeff: 0.009999999999999998
          kl: 0.013874893235831385
          policy_loss: -0.07804875928494666
          total_loss: -0.0841034431838327
          vf_explained_var: 0.03604593127965927
          vf_loss: 5.360058919576558e-07
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  iterations_since_restore: 812
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,812,20002.1,812000,0,0,0,328.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-09_03-58-00
  done: false
  episode_len_mean: 332.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2256
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 0.8250833643807305
          entropy_coeff: 0.009999999999999998
          kl: 0.012984269127069226
          policy_loss: -0.11049054314692815
          total_loss: -0.1074163352449735
          vf_explained_var: -0.7496927976608276
          vf_loss: 0.00023262434234538887
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 813000
  iterations_since_restore: 813
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,813,20019.6,813000,0,0,0,332.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-09_03-58-20
  done: false
  episode_len_mean: 333.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2258
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.720798585149977
          entropy_coeff: 0.009999999999999998
          kl: 0.010365080590528288
          policy_loss: -0.03837754084832139
          total_loss: -0.04672627012348837
          vf_explained_var: -0.4139130413532257
          vf_loss: 4.403313535779792e-06
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000
  iterations_since_restore: 814
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,814,20039.6,814000,0,0,0,333.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-09_03-58-44
  done: false
  episode_len_mean: 335.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2261
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8202497190899318
          entropy_coeff: 0.009999999999999998
          kl: 0.012337445290138523
          policy_loss: -0.06395655588971244
          total_loss: -0.07161813916431534
          vf_explained_var: -0.3616916537284851
          vf_loss: 1.072148816660956e-06
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 815000
  iterations_since_restore: 815
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,815,20064.2,815000,0,0,0,335.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-09_03-59-07
  done: false
  episode_len_mean: 336.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2264
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9487299097908868
          entropy_coeff: 0.009999999999999998
          kl: 0.016252148025314293
          policy_loss: -0.07468998920068973
          total_loss: -0.08028773168722789
          vf_explained_var: -0.40645718574523926
          vf_loss: 5.396634332378704e-06
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000
  iterations_since_restore: 816
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,816,20086.9,816000,0,0,0,336.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-09_03-59-32
  done: false
  episode_len_mean: 337.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2267
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0392070213953652
          entropy_coeff: 0.009999999999999998
          kl: 0.012557839218769828
          policy_loss: -0.03491094290382332
          total_loss: -0.04457418049375216
          vf_explained_var: -0.6303195357322693
          vf_loss: 7.120569787528236e-07
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 817000
  iterations_since_restore: 817
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,817,20111.5,817000,0,0,0,337.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-09_03-59-57
  done: false
  episode_len_mean: 336.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2270
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0212714354197185
          entropy_coeff: 0.009999999999999998
          kl: 0.011305676418595489
          policy_loss: -0.10494958381685945
          total_loss: -0.11550342829691039
          vf_explained_var: -0.6498931646347046
          vf_loss: 4.691138450930844e-07
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 818000
  iterations_since_restore: 818
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,818,20136.8,818000,0,0,0,336.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-09_04-00-20
  done: false
  episode_len_mean: 338.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2273
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8646890242894492
          entropy_coeff: 0.009999999999999998
          kl: 0.010469672350197436
          policy_loss: -0.013818387583725982
          total_loss: -0.023520245775580408
          vf_explained_var: -0.4498539865016937
          vf_loss: 8.25285731151679e-07
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 819000
  iterations_since_restore: 819
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,819,20160.2,819000,0,0,0,338.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-09_04-00-43
  done: false
  episode_len_mean: 339.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2275
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.100227395693461
          entropy_coeff: 0.009999999999999998
          kl: 0.01135896289519523
          policy_loss: -0.06507526420884663
          total_loss: -0.0763731306625737
          vf_explained_var: -0.42240190505981445
          vf_loss: 4.796839299893286e-07
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000
  iterations_since_restore: 820
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,820,20183.2,820000,0,0,0,339.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-09_04-01-05
  done: false
  episode_len_mean: 342.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2278
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7806205060746936
          entropy_coeff: 0.009999999999999998
          kl: 0.013189357560160896
          policy_loss: -0.06291176130374272
          total_loss: -0.0694496684604221
          vf_explained_var: -0.6175596714019775
          vf_loss: 6.696577075773449e-07
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000
  iterations_since_restore: 821
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,821,20204.9,821000,0,0,0,342.61




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-09_04-01-40
  done: false
  episode_len_mean: 345.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2280
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4524589803483752
          entropy_coeff: 0.009999999999999998
          kl: 0.008156338216024198
          policy_loss: 0.09167120402885807
          total_loss: 0.08411929853674438
          vf_explained_var: -0.14696848392486572
          vf_loss: 4.74916696507282e-06
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822000
  iterations_since_restore: 822
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,822,20239.5,822000,0,0,0,345.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-09_04-02-06
  done: false
  episode_len_mean: 346.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2283
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7902739617559644
          entropy_coeff: 0.009999999999999998
          kl: 0.012147252623303552
          policy_loss: -0.061732406417528786
          total_loss: -0.06919125119845072
          vf_explained_var: -0.9519087672233582
          vf_loss: 6.653880611919602e-05
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000
  iterations_since_restore: 823
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,823,20265.8,823000,0,0,0,346.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-09_04-02-27
  done: false
  episode_len_mean: 347.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2285
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8130748020278082
          entropy_coeff: 0.009999999999999998
          kl: 0.011404538587389428
          policy_loss: 0.020685123859180345
          total_loss: 0.012299342826008797
          vf_explained_var: -0.1497269719839096
          vf_loss: 2.1056123642893e-06
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000
  iterations_since_restore: 824
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,824,20286.9,824000,0,0,0,347.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-09_04-02-50
  done: false
  episode_len_mean: 350.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2288
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8333779003885058
          entropy_coeff: 0.009999999999999998
          kl: 0.013389347958904669
          policy_loss: -0.08551041322449843
          total_loss: -0.09240470561716292
          vf_explained_var: -0.46394047141075134
          vf_loss: 1.0126101550945856e-06
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
  iterations_since_restore: 825
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,825,20310.1,825000,0,0,0,350.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-09_04-03-14
  done: false
  episode_len_mean: 348.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2291
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8759201831287808
          entropy_coeff: 0.009999999999999998
          kl: 0.012106134413302556
          policy_loss: -0.10375961330201891
          total_loss: -0.1121759522292349
          vf_explained_var: -0.6180185675621033
          vf_loss: 6.293010563417232e-07
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000
  iterations_since_restore: 826
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,826,20333.3,826000,0,0,0,348.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-09_04-03-39
  done: false
  episode_len_mean: 349.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9029453330569797
          entropy_coeff: 0.009999999999999998
          kl: 0.010537826909908512
          policy_loss: -0.14427082327504953
          total_loss: -0.15429734575251738
          vf_explained_var: -0.7697115540504456
          vf_loss: 4.992174006525197e-07
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827000
  iterations_since_restore: 827
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,827,20358.3,827000,0,0,0,349.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-09_04-04-01
  done: false
  episode_len_mean: 350.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2296
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7876298467318217
          entropy_coeff: 0.009999999999999998
          kl: 0.011478407169311922
          policy_loss: -0.08472544074886375
          total_loss: -0.0927951053198841
          vf_explained_var: -0.8564693927764893
          vf_loss: 6.68020761749075e-07
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000
  iterations_since_restore: 828
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,828,20380.5,828000,0,0,0,350.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-09_04-04-24
  done: false
  episode_len_mean: 352.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2299
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7225866728358799
          entropy_coeff: 0.009999999999999998
          kl: 0.006084977824044084
          policy_loss: -0.026068275437379877
          total_loss: -0.03809486882140239
          vf_explained_var: -0.6220508813858032
          vf_loss: 8.962714711439427e-07
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  iterations_since_restore: 829
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,829,20403.5,829000,0,0,0,352.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-09_04-04-47
  done: false
  episode_len_mean: 353.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2302
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9246400038401286
          entropy_coeff: 0.009999999999999998
          kl: 0.011330701941964744
          policy_loss: -0.06107505659262339
          total_loss: -0.07064117857565483
          vf_explained_var: -0.9486523270606995
          vf_loss: 4.946564644114915e-07
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830000
  iterations_since_restore: 830
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,830,20426.8,830000,0,0,0,353.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-09_04-05-10
  done: false
  episode_len_mean: 355.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2305
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.834207730823093
          entropy_coeff: 0.009999999999999998
          kl: 0.01151913715187228
          policy_loss: -0.06409662319347262
          total_loss: -0.07259737422896756
          vf_explained_var: -0.6285169720649719
          vf_loss: 5.633196195731216e-07
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000
  iterations_since_restore: 831
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,831,20449.5,831000,0,0,0,355.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-09_04-05-31
  done: false
  episode_len_mean: 358.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2307
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8969088660346136
          entropy_coeff: 0.009999999999999998
          kl: 0.011139143181166647
          policy_loss: -0.08193856411510043
          total_loss: -0.09137448500841856
          vf_explained_var: -1.0
          vf_loss: 1.7033340938926106e-05
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 832000
  iterations_since_restore: 832
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,832,20470.5,832000,0,0,0,358.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-09_04-06-13
  done: false
  episode_len_mean: 359.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2310
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8145758655336168
          entropy_coeff: 0.009999999999999998
          kl: 0.013381283828668818
          policy_loss: -0.06323479960362116
          total_loss: -0.06993801179859373
          vf_explained_var: -0.06980796158313751
          vf_loss: 1.095736876221862e-05
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000
  iterations_since_restore: 833
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,833,20512.6,833000,0,0,0,359.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-09_04-06-33
  done: false
  episode_len_mean: 361.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2312
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5046108722686768
          entropy_coeff: 0.009999999999999998
          kl: 0.00960162352574295
          policy_loss: -0.05967307839956548
          total_loss: -0.06651432913624578
          vf_explained_var: 0.03360943868756294
          vf_loss: 2.2226544324944875e-06
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
  iterations_since_restore: 834
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,834,20532.9,834000,0,0,0,361.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-09_04-06-58
  done: false
  episode_len_mean: 361.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2316
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6962253133455911
          entropy_coeff: 0.009999999999999998
          kl: 0.008186264539933399
          policy_loss: -0.04366075557967027
          total_loss: -0.05362838993055953
          vf_explained_var: -0.7241966128349304
          vf_loss: 1.1184775896961381e-06
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835000
  iterations_since_restore: 835
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,835,20557.8,835000,0,0,0,361.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-09_04-07-20
  done: false
  episode_len_mean: 362.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2318
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5529561691813998
          entropy_coeff: 0.009999999999999998
          kl: 0.008948243200689158
          policy_loss: -0.037275628471333117
          total_loss: -0.04515978320915666
          vf_explained_var: -0.5261275172233582
          vf_loss: 9.510447869034679e-07
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 836000
  iterations_since_restore: 836
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,836,20579,836000,0,0,0,362.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-09_04-07-43
  done: false
  episode_len_mean: 363.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2321
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.670142118136088
          entropy_coeff: 0.009999999999999998
          kl: 0.013153690951689138
          policy_loss: -0.07602863179312812
          total_loss: -0.08149216928415828
          vf_explained_var: -0.42488470673561096
          vf_loss: 7.284178404158107e-07
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  iterations_since_restore: 837
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,837,20602.5,837000,0,0,0,363.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-09_04-08-04
  done: false
  episode_len_mean: 365.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2323
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5787444008721245
          entropy_coeff: 0.009999999999999998
          kl: 0.010068522545774873
          policy_loss: -0.049279003052247895
          total_loss: -0.056464208310676944
          vf_explained_var: -0.7528571486473083
          vf_loss: 7.31368437653954e-07
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  iterations_since_restore: 838
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,838,20623.3,838000,0,0,0,365.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-09_04-08-23
  done: false
  episode_len_mean: 365.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2326
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.583922599421607
          entropy_coeff: 0.009999999999999998
          kl: 0.011091658522379432
          policy_loss: 0.057736015237039987
          total_loss: 0.051373428313268556
          vf_explained_var: 0.09044620394706726
          vf_loss: 1.0710323917035365e-06
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
  iterations_since_restore: 839
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,839,20642.3,839000,0,0,0,365.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-09_04-08-46
  done: false
  episode_len_mean: 366.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2329
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5673815462324354
          entropy_coeff: 0.009999999999999998
          kl: 0.009970645712648876
          policy_loss: -0.02638180156548818
          total_loss: -0.033536124146646924
          vf_explained_var: 0.019721824675798416
          vf_loss: 1.6036833225522666e-06
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 840000
  iterations_since_restore: 840
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,840,20665.1,840000,0,0,0,366.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-09_04-09-10
  done: false
  episode_len_mean: 366.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2331
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.68680473168691
          entropy_coeff: 0.009999999999999998
          kl: 0.009443833551714074
          policy_loss: -0.05089424536046055
          total_loss: -0.05969200630982717
          vf_explained_var: -0.4802011251449585
          vf_loss: 2.4491239792546874e-06
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 841000
  iterations_since_restore: 841
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,841,20688.9,841000,0,0,0,366.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-09_04-09-34
  done: false
  episode_len_mean: 367.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2334
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7080013301637438
          entropy_coeff: 0.009999999999999998
          kl: 0.011470903311893387
          policy_loss: -0.09693390991952684
          total_loss: -0.1042105860180325
          vf_explained_var: -0.5827880501747131
          vf_loss: 3.7786390284585043e-06
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 842000
  iterations_since_restore: 842
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,842,20713.4,842000,0,0,0,367.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-09_04-09-56
  done: false
  episode_len_mean: 367.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2337
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7593968550364176
          entropy_coeff: 0.009999999999999998
          kl: 0.010756446821604686
          policy_loss: -0.03071873943424887
          total_loss: -0.03911704269962178
          vf_explained_var: -0.5996754765510559
          vf_loss: 6.465357736828992e-06
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843000
  iterations_since_restore: 843
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,843,20735.2,843000,0,0,0,367.18




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-09_04-10-36
  done: false
  episode_len_mean: 367.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2340
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7719473772578769
          entropy_coeff: 0.009999999999999998
          kl: 0.0110741122315908
          policy_loss: -0.06552807338949707
          total_loss: -0.07378522054188781
          vf_explained_var: -0.3232581913471222
          vf_loss: 1.7474880615332545e-06
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000
  iterations_since_restore: 844
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,844,20774.8,844000,0,0,0,367.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-09_04-11-01
  done: false
  episode_len_mean: 368.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2343
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7479543526967367
          entropy_coeff: 0.009999999999999998
          kl: 0.011596815862950373
          policy_loss: -0.09942088454133935
          total_loss: -0.1069162084410588
          vf_explained_var: -0.875787079334259
          vf_loss: 7.709484572413001e-05
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000
  iterations_since_restore: 845
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,845,20799.8,845000,0,0,0,368.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-09_04-11-24
  done: false
  episode_len_mean: 370.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2345
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8649703370200263
          entropy_coeff: 0.009999999999999998
          kl: 0.010542067207088281
          policy_loss: -0.10387527189320989
          total_loss: -0.11351724826834268
          vf_explained_var: -0.2305021435022354
          vf_loss: 1.6724472929910109e-06
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
  iterations_since_restore: 846
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,846,20822.9,846000,0,0,0,370.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-09_04-11-48
  done: false
  episode_len_mean: 370.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2348
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245514353116354
          entropy_coeff: 0.009999999999999998
          kl: 0.008985252175411082
          policy_loss: -0.10441584388415019
          total_loss: -0.11498451286719905
          vf_explained_var: 0.016704199835658073
          vf_loss: 7.739637583073192e-07
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000
  iterations_since_restore: 847
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,847,20847.1,847000,0,0,0,370.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-09_04-12-11
  done: false
  episode_len_mean: 371.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2351
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8024541642930774
          entropy_coeff: 0.009999999999999998
          kl: 0.01483097881530371
          policy_loss: -0.08094808810080091
          total_loss: -0.08630123974548445
          vf_explained_var: -0.3893146216869354
          vf_loss: 1.331886268000441e-06
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 848000
  iterations_since_restore: 848
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,848,20869.9,848000,0,0,0,371.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-09_04-12-32
  done: false
  episode_len_mean: 372.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2354
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8101429144541423
          entropy_coeff: 0.009999999999999998
          kl: 0.012458494350971221
          policy_loss: -0.06259482743011581
          total_loss: -0.0700515719751517
          vf_explained_var: -0.4146062731742859
          vf_loss: 1.4333281828587335e-06
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000
  iterations_since_restore: 849
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,849,20891.3,849000,0,0,0,372.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-09_04-12-54
  done: false
  episode_len_mean: 369.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2356
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7875621186362372
          entropy_coeff: 0.009999999999999998
          kl: 0.007243483963617257
          policy_loss: -0.007387275455726518
          total_loss: -0.019074076145059533
          vf_explained_var: -0.4185153543949127
          vf_loss: 7.35615505062823e-07
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000
  iterations_since_restore: 850
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,850,20913.4,850000,0,0,0,369.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-09_04-13-16
  done: false
  episode_len_mean: 368.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2359
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8463034709294637
          entropy_coeff: 0.009999999999999998
          kl: 0.011114466871463098
          policy_loss: -0.06616610817404257
          total_loss: -0.07513356057720053
          vf_explained_var: -0.9210076332092285
          vf_loss: 5.317920391058175e-07
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000
  iterations_since_restore: 851
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,851,20934.8,851000,0,0,0,368.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-09_04-13-34
  done: false
  episode_len_mean: 370.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2361
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.2485247550739182
          entropy_coeff: 0.009999999999999998
          kl: 0.0074995885882079465
          policy_loss: 0.00376517739560869
          total_loss: -0.0009191731611887614
          vf_explained_var: -0.7403315305709839
          vf_loss: 0.0013940218313040179
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000
  iterations_since_restore: 852
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,852,20953.5,852000,0,0,0,370.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-09_04-13-55
  done: false
  episode_len_mean: 371.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2363
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.733992212348514
          entropy_coeff: 0.009999999999999998
          kl: 0.009932387307707497
          policy_loss: -0.09007473285827372
          total_loss: -0.09892845865752962
          vf_explained_var: -0.07626689970493317
          vf_loss: 9.881780967033491e-07
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 853000
  iterations_since_restore: 853
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,853,20973.7,853000,0,0,0,371.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-09_04-14-16
  done: false
  episode_len_mean: 372.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2366
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7343293733066982
          entropy_coeff: 0.009999999999999998
          kl: 0.009650044643719803
          policy_loss: -0.08074649764845769
          total_loss: -0.08984463430113263
          vf_explained_var: -0.9511098861694336
          vf_loss: 1.1523651563847025e-06
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 854000
  iterations_since_restore: 854
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,854,20995.2,854000,0,0,0,372.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-09_04-14-39
  done: false
  episode_len_mean: 373.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2369
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9264723870489333
          entropy_coeff: 0.009999999999999998
          kl: 0.009822178260027235
          policy_loss: -0.062672616665562
          total_loss: -0.07354570250544283
          vf_explained_var: -0.9830992817878723
          vf_loss: 5.843516845516206e-07
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 855000
  iterations_since_restore: 855
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,855,21018.3,855000,0,0,0,373.55




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-09_04-15-20
  done: false
  episode_len_mean: 373.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2372
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9832842840088738
          entropy_coeff: 0.009999999999999998
          kl: 0.008016436603266995
          policy_loss: -0.033202025584048694
          total_loss: -0.046185793499979706
          vf_explained_var: -1.0
          vf_loss: 6.585853040203599e-07
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 856000
  iterations_since_restore: 856
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,856,21058.7,856000,0,0,0,373.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-09_04-15-45
  done: false
  episode_len_mean: 371.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2375
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7100357744428847
          entropy_coeff: 0.009999999999999998
          kl: 0.01108464107487293
          policy_loss: -0.037808317049509946
          total_loss: -0.0454383906080491
          vf_explained_var: -0.4334171414375305
          vf_loss: 7.104813489730885e-07
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 857000
  iterations_since_restore: 857
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,857,21083.5,857000,0,0,0,371.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-09_04-16-04
  done: false
  episode_len_mean: 371.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2377
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.839874177508884
          entropy_coeff: 0.009999999999999998
          kl: 0.009217993823199934
          policy_loss: -0.10167407464856902
          total_loss: -0.11219627118359009
          vf_explained_var: -0.6190451979637146
          vf_loss: 1.6389822014338683e-06
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 858000
  iterations_since_restore: 858
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,858,21103,858000,0,0,0,371.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-09_04-16-25
  done: false
  episode_len_mean: 372.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2379
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.021397129032347
          entropy_coeff: 0.009999999999999998
          kl: 0.011944054310077137
          policy_loss: -0.12146399716536203
          total_loss: -0.12147323153913021
          vf_explained_var: -0.020008547231554985
          vf_loss: 9.67882881594859e-07
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 859000
  iterations_since_restore: 859
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,859,21123.6,859000,0,0,0,372.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-09_04-16-45
  done: false
  episode_len_mean: 373.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2382
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.959486260679033
          entropy_coeff: 0.009999999999999998
          kl: 0.00857307148917052
          policy_loss: -0.06497712164289421
          total_loss: -0.07724706975536214
          vf_explained_var: -0.8786482810974121
          vf_loss: 9.672790604832294e-07
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 860000
  iterations_since_restore: 860
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,860,21144.3,860000,0,0,0,373.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-09_04-17-07
  done: false
  episode_len_mean: 374.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2384
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.81479967435201
          entropy_coeff: 0.009999999999999998
          kl: 0.010033208106181609
          policy_loss: -0.11044577453285456
          total_loss: -0.1200217570281691
          vf_explained_var: -0.9732636213302612
          vf_loss: 6.773215848928279e-07
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000
  iterations_since_restore: 861
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,861,21166.1,861000,0,0,0,374.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-09_04-17-31
  done: false
  episode_len_mean: 372.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2387
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8407987859514026
          entropy_coeff: 0.009999999999999998
          kl: 0.012293816440110353
          policy_loss: -0.0760840814974573
          total_loss: -0.08398845984290043
          vf_explained_var: -0.8579551577568054
          vf_loss: 1.0390013623388464e-06
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862000
  iterations_since_restore: 862
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,862,21190.2,862000,0,0,0,372.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-09_04-17-55
  done: false
  episode_len_mean: 372.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2390
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6012472245428298
          entropy_coeff: 0.009999999999999998
          kl: 0.009068706014025313
          policy_loss: -0.09056382643886739
          total_loss: -0.09882826368427938
          vf_explained_var: -0.765653133392334
          vf_loss: 6.686987379427794e-07
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000
  iterations_since_restore: 863
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,863,21213.7,863000,0,0,0,372.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-09_04-18-18
  done: false
  episode_len_mean: 372.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2393
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6281576017538706
          entropy_coeff: 0.009999999999999998
          kl: 0.009332862820133651
          policy_loss: -0.07281765826046467
          total_loss: -0.08112565025997659
          vf_explained_var: -0.41638025641441345
          vf_loss: 5.489895310499075e-07
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000
  iterations_since_restore: 864
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,864,21237.1,864000,0,0,0,372.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-09_04-18-43
  done: false
  episode_len_mean: 371.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2396
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6951252394252354
          entropy_coeff: 0.009999999999999998
          kl: 0.011280256776587984
          policy_loss: -0.1039084350483285
          total_loss: -0.11122174246443642
          vf_explained_var: -0.4066695272922516
          vf_loss: 1.2582326617247316e-06
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 865000
  iterations_since_restore: 865
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,865,21261.5,865000,0,0,0,371.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-09_04-19-06
  done: false
  episode_len_mean: 370.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2399
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9160921123292711
          entropy_coeff: 0.009999999999999998
          kl: 0.013283367196858235
          policy_loss: -0.03983252820455366
          total_loss: -0.047644371394481924
          vf_explained_var: -0.5900617837905884
          vf_loss: 1.1407812305858694e-06
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000
  iterations_since_restore: 866
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,866,21284.8,866000,0,0,0,370.83




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-09_04-19-47
  done: false
  episode_len_mean: 370.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2402
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5293772472275629
          entropy_coeff: 0.009999999999999998
          kl: 0.00931815971301711
          policy_loss: -0.10415933219095071
          total_loss: -0.11148835644125939
          vf_explained_var: -0.38809677958488464
          vf_loss: 4.273986540207867e-06
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 867000
  iterations_since_restore: 867
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,867,21325.6,867000,0,0,0,370.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-09_04-20-08
  done: false
  episode_len_mean: 370.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2404
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8814626759952968
          entropy_coeff: 0.009999999999999998
          kl: 0.012498966801017423
          policy_loss: -0.11362940101987785
          total_loss: -0.12176519061128298
          vf_explained_var: -0.7901811599731445
          vf_loss: 1.0086118932248508e-06
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 868000
  iterations_since_restore: 868
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,868,21346.3,868000,0,0,0,370.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-09_04-20-35
  done: false
  episode_len_mean: 367.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2408
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.57422307199902
          entropy_coeff: 0.009999999999999998
          kl: 0.0073731540975815095
          policy_loss: 0.006449990129719178
          total_loss: -0.002992984838783741
          vf_explained_var: -0.07647424936294556
          vf_loss: 3.9111811831718215e-07
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869000
  iterations_since_restore: 869
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,869,21373.5,869000,0,0,0,367.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-09_04-20-58
  done: false
  episode_len_mean: 367.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2411
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6543585870001052
          entropy_coeff: 0.009999999999999998
          kl: 0.012571263180591015
          policy_loss: -0.07926000476711326
          total_loss: -0.08506337905095683
          vf_explained_var: -0.0670839175581932
          vf_loss: 6.212144635734754e-07
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 870000
  iterations_since_restore: 870
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,870,21396.8,870000,0,0,0,367.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-09_04-21-19
  done: false
  episode_len_mean: 367.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2413
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7920946174197727
          entropy_coeff: 0.009999999999999998
          kl: 0.008147794351402225
          policy_loss: -0.12034467293156517
          total_loss: -0.1313043411821127
          vf_explained_var: -0.6622134447097778
          vf_loss: 6.402112227811409e-07
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 871000
  iterations_since_restore: 871
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,871,21418.2,871000,0,0,0,367.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-09_04-21-46
  done: false
  episode_len_mean: 367.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2416
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4586672027905783
          entropy_coeff: 0.009999999999999998
          kl: 0.01833530193492672
          policy_loss: -0.12502822894603013
          total_loss: -0.12394988642384609
          vf_explained_var: 0.09618605673313141
          vf_loss: 1.2282439267134275e-06
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000
  iterations_since_restore: 872
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,872,21444.7,872000,0,0,0,367.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-09_04-22-07
  done: false
  episode_len_mean: 366.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2419
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7168469468752543
          entropy_coeff: 0.009999999999999998
          kl: 0.009034053806506629
          policy_loss: -0.09666227652794784
          total_loss: -0.10611243357674943
          vf_explained_var: -0.46445706486701965
          vf_loss: 5.481694692043169e-07
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000
  iterations_since_restore: 873
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,873,21466.1,873000,0,0,0,366.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-09_04-22-31
  done: false
  episode_len_mean: 365.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2422
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6690895557403564
          entropy_coeff: 0.009999999999999998
          kl: 0.015526191407284312
          policy_loss: -0.10828111896084415
          total_loss: -0.11170645513468319
          vf_explained_var: -0.24574296176433563
          vf_loss: 1.586719441269856e-06
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 874000
  iterations_since_restore: 874
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,874,21490,874000,0,0,0,365.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-09_04-22-51
  done: false
  episode_len_mean: 366.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2424
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.692473989062839
          entropy_coeff: 0.009999999999999998
          kl: 0.014193705128029914
          policy_loss: -0.07546135393074817
          total_loss: -0.08025875899733768
          vf_explained_var: -0.06159619614481926
          vf_loss: 1.6939117497511486e-06
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 875000
  iterations_since_restore: 875
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,875,21509.2,875000,0,0,0,366.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-09_04-23-11
  done: false
  episode_len_mean: 366.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2427
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.50607250796424
          entropy_coeff: 0.009999999999999998
          kl: 0.011176871576885933
          policy_loss: -0.06302304758379856
          total_loss: -0.06853384930226537
          vf_explained_var: -0.5386916399002075
          vf_loss: 1.5571911294248114e-06
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 876000
  iterations_since_restore: 876
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,876,21529.5,876000,0,0,0,366.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-09_04-23-32
  done: false
  episode_len_mean: 367.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2429
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7158919705284967
          entropy_coeff: 0.009999999999999998
          kl: 0.01048258946688851
          policy_loss: -0.08409424672524134
          total_loss: -0.09229692202061415
          vf_explained_var: -0.3938113749027252
          vf_loss: 1.0029873026143025e-06
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
  iterations_since_restore: 877
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,877,21550.6,877000,0,0,0,367.4




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-09_04-24-10
  done: false
  episode_len_mean: 368.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2432
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8120540618896483
          entropy_coeff: 0.009999999999999998
          kl: 0.0108273656629142
          policy_loss: -0.03608169315589799
          total_loss: -0.04495125648876031
          vf_explained_var: -0.8045651912689209
          vf_loss: 1.1926269773236224e-06
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 878000
  iterations_since_restore: 878
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,878,21588.6,878000,0,0,0,368.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-09_04-24-31
  done: false
  episode_len_mean: 370.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2434
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8741997692320083
          entropy_coeff: 0.009999999999999998
          kl: 0.011922742128554731
          policy_loss: -0.040575541721449955
          total_loss: -0.04913147389888763
          vf_explained_var: -0.05627808719873428
          vf_loss: 5.049611303320489e-07
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 879000
  iterations_since_restore: 879
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,879,21609.2,879000,0,0,0,370.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-09_04-24-54
  done: false
  episode_len_mean: 369.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2437
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7535988516277736
          entropy_coeff: 0.009999999999999998
          kl: 0.007974821926629695
          policy_loss: -0.031046989063421886
          total_loss: -0.04176975405878491
          vf_explained_var: -0.3942123353481293
          vf_loss: 3.601297531001061e-07
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 880000
  iterations_since_restore: 880
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,880,21633,880000,0,0,0,369.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-09_04-25-19
  done: false
  episode_len_mean: 370.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2440
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.943184455235799
          entropy_coeff: 0.009999999999999998
          kl: 0.013901770254317363
          policy_loss: 0.007651916725767983
          total_loss: 9.712042907873789e-05
          vf_explained_var: -0.44011223316192627
          vf_loss: 8.077283414170071e-07
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000
  iterations_since_restore: 881
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,881,21657.3,881000,0,0,0,370.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-09_04-25-39
  done: false
  episode_len_mean: 371.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2442
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7808966477711996
          entropy_coeff: 0.009999999999999998
          kl: 0.009581868586789099
          policy_loss: -0.06510687201387352
          total_loss: -0.07472945319281685
          vf_explained_var: -0.43254750967025757
          vf_loss: 6.241715952784135e-07
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882000
  iterations_since_restore: 882
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,882,21677.1,882000,0,0,0,371.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-09_04-26-03
  done: false
  episode_len_mean: 370.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2445
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.4317004958788553
          entropy_coeff: 0.009999999999999998
          kl: 0.012885659883667067
          policy_loss: -0.11890268507930968
          total_loss: -0.12221073293023639
          vf_explained_var: -0.2818206250667572
          vf_loss: 7.775760268360601e-07
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  iterations_since_restore: 883
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,883,21701.8,883000,0,0,0,370.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-09_04-26-26
  done: false
  episode_len_mean: 371.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2448
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8950724138153925
          entropy_coeff: 0.009999999999999998
          kl: 0.013301425207483755
          policy_loss: -0.05489941857134303
          total_loss: -0.06248338735765881
          vf_explained_var: -0.8378802537918091
          vf_loss: 3.3872858788678665e-06
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000
  iterations_since_restore: 884
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,884,21724,884000,0,0,0,371.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-09_04-26-49
  done: false
  episode_len_mean: 371.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2451
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7271405352486504
          entropy_coeff: 0.009999999999999998
          kl: 0.01072608029772864
          policy_loss: -0.06172001274923484
          total_loss: -0.06982655198209815
          vf_explained_var: -0.44985195994377136
          vf_loss: 1.608677632387096e-06
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 885000
  iterations_since_restore: 885
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,885,21747.6,885000,0,0,0,371.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-09_04-27-12
  done: false
  episode_len_mean: 370.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2454
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.786946419874827
          entropy_coeff: 0.009999999999999998
          kl: 0.007431055120102848
          policy_loss: -0.013491795356902812
          total_loss: -0.025011353691418967
          vf_explained_var: 0.030939361080527306
          vf_loss: 1.5809844100544979e-06
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000
  iterations_since_restore: 886
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,886,21770.7,886000,0,0,0,370.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-09_04-27-38
  done: false
  episode_len_mean: 368.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2457
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9766449186537
          entropy_coeff: 0.009999999999999998
          kl: 0.01263572520610627
          policy_loss: -0.05816108220153385
          total_loss: -0.0671315544595321
          vf_explained_var: -0.6560759544372559
          vf_loss: 1.3145072267030223e-06
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
  iterations_since_restore: 887
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,887,21796,887000,0,0,0,368.64




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-09_04-28-20
  done: false
  episode_len_mean: 366.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2460
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9021567384401956
          entropy_coeff: 0.009999999999999998
          kl: 0.01343481094018415
          policy_loss: -0.11461867139571243
          total_loss: -0.12215765176547898
          vf_explained_var: -0.8613156080245972
          vf_loss: 5.269869225483287e-06
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 888000
  iterations_since_restore: 888
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,888,21838.5,888000,0,0,0,366.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-09_04-28-44
  done: false
  episode_len_mean: 363.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2463
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5371874425146315
          entropy_coeff: 0.009999999999999998
          kl: 0.01166771242352415
          policy_loss: -0.04205895095235772
          total_loss: -0.04729757755994797
          vf_explained_var: -0.9805654883384705
          vf_loss: 0.00016555907700169174
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 889000
  iterations_since_restore: 889
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,889,21862.2,889000,0,0,0,363.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-09_04-29-10
  done: false
  episode_len_mean: 361.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2466
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6637719498740302
          entropy_coeff: 0.009999999999999998
          kl: 0.010595498582923055
          policy_loss: -0.07933467775583267
          total_loss: -0.08690228921671708
          vf_explained_var: -0.5016575455665588
          vf_loss: 1.840811500465457e-05
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
  iterations_since_restore: 890
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,890,21887.9,890000,0,0,0,361.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-09_04-29-36
  done: false
  episode_len_mean: 359.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2469
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.831145269340939
          entropy_coeff: 0.009999999999999998
          kl: 0.01007132613887628
          policy_loss: -0.1919686344348722
          total_loss: -0.20167358964681625
          vf_explained_var: -0.07337649166584015
          vf_loss: 2.5965116757712774e-06
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000
  iterations_since_restore: 891
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,891,21914.1,891000,0,0,0,359.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-09_04-30-01
  done: false
  episode_len_mean: 358.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2472
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5777181638611688
          entropy_coeff: 0.009999999999999998
          kl: 0.013192171148805167
          policy_loss: -0.05816564783453941
          total_loss: -0.06267176551951302
          vf_explained_var: 0.08742188662290573
          vf_loss: 1.036317123571785e-06
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
  iterations_since_restore: 892
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,892,21939.5,892000,0,0,0,358.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-09_04-30-27
  done: false
  episode_len_mean: 357.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2475
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6243463344044156
          entropy_coeff: 0.009999999999999998
          kl: 0.012120056000639718
          policy_loss: -0.10297984460161792
          total_loss: -0.10886711834205522
          vf_explained_var: 0.644018292427063
          vf_loss: 2.064122442549079e-06
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 893000
  iterations_since_restore: 893
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,893,21965.8,893000,0,0,0,357.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-09_04-30-49
  done: false
  episode_len_mean: 354.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2478
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6844542940457663
          entropy_coeff: 0.009999999999999998
          kl: 0.008758964668584845
          policy_loss: -0.08316066517598099
          total_loss: -0.09250037061671416
          vf_explained_var: -0.21816228330135345
          vf_loss: 2.2085866828231498e-05
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
  iterations_since_restore: 894
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,894,21987,894000,0,0,0,354.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-09_04-31-13
  done: false
  episode_len_mean: 353.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2481
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7778590811623467
          entropy_coeff: 0.009999999999999998
          kl: 0.019603183013996873
          policy_loss: -0.12333038720405765
          total_loss: -0.12435331789569723
          vf_explained_var: -0.3635648190975189
          vf_loss: 8.718980175596396e-06
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000
  iterations_since_restore: 895
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,895,22011.3,895000,0,0,0,353.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-09_04-31-34
  done: false
  episode_len_mean: 353.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2484
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7031751685672336
          entropy_coeff: 0.009999999999999998
          kl: 0.011961046719355453
          policy_loss: -0.07036278638988733
          total_loss: -0.07717430063833793
          vf_explained_var: -0.3203914761543274
          vf_loss: 1.951796442906723e-06
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
  iterations_since_restore: 896
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,896,22032.6,896000,0,0,0,353.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-09_04-31-55
  done: false
  episode_len_mean: 354.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2486
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.772653713491228
          entropy_coeff: 0.009999999999999998
          kl: 0.012084072672662089
          policy_loss: -0.07419676321248213
          total_loss: -0.08159758581055535
          vf_explained_var: -0.13553088903427124
          vf_loss: 2.32907925502938e-06
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000
  iterations_since_restore: 897
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,897,22053.3,897000,0,0,0,354.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-09_04-32-17
  done: false
  episode_len_mean: 355.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2489
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8123585687743293
          entropy_coeff: 0.009999999999999998
          kl: 0.011355358040099805
          policy_loss: -0.08253027995427449
          total_loss: -0.09095242015189595
          vf_explained_var: -0.24066486954689026
          vf_loss: 5.984474858185725e-07
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 898000
  iterations_since_restore: 898
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,898,22075.3,898000,0,0,0,355.22




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-09_04-32-59
  done: false
  episode_len_mean: 353.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2492
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.601142152150472
          entropy_coeff: 0.009999999999999998
          kl: 0.010055078113965325
          policy_loss: -0.06083378133674463
          total_loss: -0.06825392920937803
          vf_explained_var: 0.118069127202034
          vf_loss: 1.2482494030210648e-06
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000
  iterations_since_restore: 899
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,899,22117.1,899000,0,0,0,353.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-09_04-33-21
  done: false
  episode_len_mean: 354.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2495
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7289226399527655
          entropy_coeff: 0.009999999999999998
          kl: 0.008962018182696718
          policy_loss: -0.11446012780070305
          total_loss: -0.12409246588746707
          vf_explained_var: 0.4177352786064148
          vf_loss: 6.649550619562837e-07
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000
  iterations_since_restore: 900
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,900,22139.4,900000,0,0,0,354.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-09_04-33-46
  done: false
  episode_len_mean: 354.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2498
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7879758887820774
          entropy_coeff: 0.009999999999999998
          kl: 0.008865495838302727
          policy_loss: -0.08919427181697553
          total_loss: -0.09949981715116236
          vf_explained_var: -0.44101426005363464
          vf_loss: 4.486723837102444e-07
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 901000
  iterations_since_restore: 901
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,901,22164,901000,0,0,0,354.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-09_04-34-10
  done: false
  episode_len_mean: 353.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2501
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.661694300174713
          entropy_coeff: 0.009999999999999998
          kl: 0.009668187590137102
          policy_loss: -0.0126923149658574
          total_loss: -0.021049273014068604
          vf_explained_var: -0.2904419004917145
          vf_loss: 4.844127346359528e-07
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  iterations_since_restore: 902
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,902,22188.1,902000,0,0,0,353.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-09_04-34-33
  done: false
  episode_len_mean: 352.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2504
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7506907290882534
          entropy_coeff: 0.009999999999999998
          kl: 0.01253484341012377
          policy_loss: 0.005288741323682997
          total_loss: -0.001508843588332335
          vf_explained_var: -0.6722710728645325
          vf_loss: 8.457313220510008e-07
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000
  iterations_since_restore: 903
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,903,22211,903000,0,0,0,352.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-09_04-34-55
  done: false
  episode_len_mean: 354.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2506
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7746262775527106
          entropy_coeff: 0.009999999999999998
          kl: 0.011313757731427801
          policy_loss: -0.05851631582611137
          total_loss: -0.06659662193722195
          vf_explained_var: -0.9018077254295349
          vf_loss: 6.518657668961067e-07
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
  iterations_since_restore: 904
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,904,22232.6,904000,0,0,0,354.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-09_04-35-18
  done: false
  episode_len_mean: 354.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2509
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7200613458951315
          entropy_coeff: 0.009999999999999998
          kl: 0.012612251914276286
          policy_loss: -0.03232639955563678
          total_loss: -0.03875186275690794
          vf_explained_var: -0.4960952699184418
          vf_loss: 5.433361206680982e-07
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
  iterations_since_restore: 905
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,905,22255.6,905000,0,0,0,354.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-09_04-35-39
  done: false
  episode_len_mean: 354.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2512
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.66963226530287
          entropy_coeff: 0.009999999999999998
          kl: 0.012920408091656082
          policy_loss: -0.05332053241630395
          total_loss: -0.05897729504439566
          vf_explained_var: 0.05649886280298233
          vf_loss: 1.699415079454209e-06
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000
  iterations_since_restore: 906
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,906,22277.5,906000,0,0,0,354.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-09_04-36-01
  done: false
  episode_len_mean: 355.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2514
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8729970773061118
          entropy_coeff: 0.009999999999999998
          kl: 0.012346556517682572
          policy_loss: -0.1455300681706932
          total_loss: -0.15371140754885143
          vf_explained_var: 0.01503833569586277
          vf_loss: 1.0073350791925704e-06
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000
  iterations_since_restore: 907
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,907,22298.9,907000,0,0,0,355.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-09_04-36-23
  done: false
  episode_len_mean: 357.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2517
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.819983140627543
          entropy_coeff: 0.009999999999999998
          kl: 0.0163188029767858
          policy_loss: -0.09397605539609989
          total_loss: -0.09821686819195748
          vf_explained_var: -0.9658123254776001
          vf_loss: 1.791604193310074e-05
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908000
  iterations_since_restore: 908
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,908,22320.7,908000,0,0,0,357.58




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-09_04-37-02
  done: false
  episode_len_mean: 356.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2520
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.619986931482951
          entropy_coeff: 0.009999999999999998
          kl: 0.012269813016631678
          policy_loss: -0.12145616970956326
          total_loss: -0.12714880742132664
          vf_explained_var: -0.8693482875823975
          vf_loss: 2.5167693307063623e-05
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 909000
  iterations_since_restore: 909
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,909,22359.8,909000,0,0,0,356.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-09_04-37-25
  done: false
  episode_len_mean: 358.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2522
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6606905553076001
          entropy_coeff: 0.009999999999999998
          kl: 0.012423373678549786
          policy_loss: -0.13541557929582065
          total_loss: -0.14140706008507145
          vf_explained_var: -0.14448541402816772
          vf_loss: 2.18060579426391e-06
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910000
  iterations_since_restore: 910
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,910,22382.7,910000,0,0,0,358.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-09_04-37-47
  done: false
  episode_len_mean: 356.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2525
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6867415097024705
          entropy_coeff: 0.009999999999999998
          kl: 0.01236416009028388
          policy_loss: -0.09620510281787978
          total_loss: -0.10250906323393186
          vf_explained_var: 0.09812550991773605
          vf_loss: 7.90474593751848e-07
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000
  iterations_since_restore: 911
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,911,22405.1,911000,0,0,0,356.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-09_04-38-06
  done: false
  episode_len_mean: 355.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2527
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5320953839355045
          entropy_coeff: 0.009999999999999998
          kl: 0.00022081607252495418
          policy_loss: -0.0005061520004851951
          total_loss: -0.015033321123984126
          vf_explained_var: -0.868646502494812
          vf_loss: 0.0006051419048213171
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 912000
  iterations_since_restore: 912
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,912,22424,912000,0,0,0,355.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-09_04-38-28
  done: false
  episode_len_mean: 356.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2530
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7475869377454123
          entropy_coeff: 0.009999999999999998
          kl: 0.013071753076447144
          policy_loss: -0.0566890873428848
          total_loss: -0.06858006297714181
          vf_explained_var: -0.6589968800544739
          vf_loss: 1.3147312494589238e-06
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 913000
  iterations_since_restore: 913
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,913,22446.3,913000,0,0,0,356.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-09_04-38-52
  done: false
  episode_len_mean: 354.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2533
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8075745582580567
          entropy_coeff: 0.009999999999999998
          kl: 0.01463224646191566
          policy_loss: -0.10864842084960805
          total_loss: -0.12047353662136528
          vf_explained_var: -0.5052502155303955
          vf_loss: 4.891882010977295e-07
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914000
  iterations_since_restore: 914
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,914,22470,914000,0,0,0,354.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-09_04-39-14
  done: false
  episode_len_mean: 354.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2536
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.79732852379481
          entropy_coeff: 0.009999999999999998
          kl: 0.014960357991306437
          policy_loss: -0.09647616782329149
          total_loss: -0.1080577903530664
          vf_explained_var: -0.6443654894828796
          vf_loss: 1.367865135016978e-06
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915000
  iterations_since_restore: 915
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,915,22492.3,915000,0,0,0,354.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-09_04-39-39
  done: false
  episode_len_mean: 353.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2539
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7210642629199557
          entropy_coeff: 0.009999999999999998
          kl: 0.014859764541392116
          policy_loss: 0.013533587857253022
          total_loss: 0.0026708824767006766
          vf_explained_var: -0.5446962118148804
          vf_loss: 6.110255419066056e-07
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916000
  iterations_since_restore: 916
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,916,22517.3,916000,0,0,0,353.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-09_04-39-59
  done: false
  episode_len_mean: 354.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2541
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7704416738616096
          entropy_coeff: 0.009999999999999998
          kl: 0.01631246207471872
          policy_loss: -0.09162019685738616
          total_loss: -0.102356146938271
          vf_explained_var: -1.0
          vf_loss: 6.237543472151528e-07
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917000
  iterations_since_restore: 917
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,917,22536.7,917000,0,0,0,354.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-09_04-40-22
  done: false
  episode_len_mean: 353.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2544
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8372075981563991
          entropy_coeff: 0.009999999999999998
          kl: 0.009852211547773097
          policy_loss: -0.08354596991298927
          total_loss: -0.09770920885105928
          vf_explained_var: -0.3041367530822754
          vf_loss: 4.809813304973432e-07
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918000
  iterations_since_restore: 918
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,918,22560.2,918000,0,0,0,353.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-09_04-40-46
  done: false
  episode_len_mean: 353.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2547
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.5706590745184157
          entropy_coeff: 0.009999999999999998
          kl: 0.01099824849378997
          policy_loss: 0.011294805506865183
          total_loss: 0.13668228917651706
          vf_explained_var: -0.5318747758865356
          vf_loss: 0.13639619030808617
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 919000
  iterations_since_restore: 919
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,919,22583.9,919000,-0.07,0,-7,353.88




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-09_04-41-27
  done: false
  episode_len_mean: 352.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2550
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6429361091719734
          entropy_coeff: 0.009999999999999998
          kl: 0.022195283616417524
          policy_loss: -0.08667405858221981
          total_loss: -0.08277295052798259
          vf_explained_var: -0.5487026572227478
          vf_loss: 0.010849788003704614
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000
  iterations_since_restore: 920
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,920,22624.3,920000,-0.07,0,-7,352.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-09_04-41-48
  done: false
  episode_len_mean: 353.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2553
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6904261469841004
          entropy_coeff: 0.009999999999999998
          kl: 0.016854309605939713
          policy_loss: -0.04880575148595704
          total_loss: -0.048710039878884955
          vf_explained_var: -0.02611829712986946
          vf_loss: 0.006201033780558242
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000
  iterations_since_restore: 921
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,921,22646.2,921000,-0.07,0,-7,353.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-09_04-42-10
  done: false
  episode_len_mean: 353.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2555
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.916624014907413
          entropy_coeff: 0.009999999999999998
          kl: 0.014300012807224451
          policy_loss: -0.13672948082288106
          total_loss: -0.14262097022599643
          vf_explained_var: -0.11334455013275146
          vf_loss: 0.004112410315105485
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000
  iterations_since_restore: 922
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,922,22668.2,922000,-0.07,0,-7,353.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-09_04-42-33
  done: false
  episode_len_mean: 355.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2558
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8783721235063342
          entropy_coeff: 0.009999999999999998
          kl: 0.01387917060298818
          policy_loss: -0.09531063652700848
          total_loss: -0.10310838715069824
          vf_explained_var: -0.36070796847343445
          vf_loss: 0.002093269835071017
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000
  iterations_since_restore: 923
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,923,22690.7,923000,-0.07,0,-7,355.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-09_04-42-55
  done: false
  episode_len_mean: 356.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2561
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8841125157144334
          entropy_coeff: 0.009999999999999998
          kl: 0.011477777270528063
          policy_loss: -0.04199593429350191
          total_loss: -0.05170860702378882
          vf_explained_var: -1.0
          vf_loss: 0.0017743791084891805
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 924000
  iterations_since_restore: 924
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,924,22713.1,924000,-0.07,0,-7,356.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-09_04-43-18
  done: false
  episode_len_mean: 357.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2563
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7925744626257154
          entropy_coeff: 0.009999999999999998
          kl: 0.01454413542386277
          policy_loss: -0.12003698880887693
          total_loss: -0.12655150855167044
          vf_explained_var: -0.5351535081863403
          vf_loss: 0.0020924679837965717
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  iterations_since_restore: 925
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,925,22735.7,925000,-0.07,0,-7,357.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-09_04-43-37
  done: false
  episode_len_mean: 360.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2565
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.567709732055664
          entropy_coeff: 0.009999999999999998
          kl: 0.015023456881329741
          policy_loss: -0.1458544538770285
          total_loss: -0.14862914110223452
          vf_explained_var: -0.3053717613220215
          vf_loss: 0.003276541904132399
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000
  iterations_since_restore: 926
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,926,22754.8,926000,-0.07,0,-7,360.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-09_04-43-59
  done: false
  episode_len_mean: 362.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2568
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8658468617333306
          entropy_coeff: 0.009999999999999998
          kl: 0.014075120293534724
          policy_loss: -0.13840736639168527
          total_loss: -0.14690474081370566
          vf_explained_var: -0.5001084804534912
          vf_loss: 0.0011428457306465133
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000
  iterations_since_restore: 927
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,927,22777.1,927000,-0.07,0,-7,362.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-09_04-44-23
  done: false
  episode_len_mean: 362.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2571
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7439676165580749
          entropy_coeff: 0.009999999999999998
          kl: 0.013294202489938595
          policy_loss: -0.07355676715572675
          total_loss: -0.08127129649122557
          vf_explained_var: -0.01684565469622612
          vf_loss: 0.0012072494198542296
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
  iterations_since_restore: 928
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,928,22800.4,928000,-0.07,0,-7,362.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-09_04-44-45
  done: false
  episode_len_mean: 365.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2574
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8383778174718222
          entropy_coeff: 0.009999999999999998
          kl: 0.01605033652820305
          policy_loss: -0.08220908572483394
          total_loss: -0.08951072970198261
          vf_explained_var: -0.448576956987381
          vf_loss: 0.0007983219269792446
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 929000
  iterations_since_restore: 929
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,929,22822.4,929000,-0.07,0,-7,365.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-09_04-45-08
  done: false
  episode_len_mean: 366.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2577
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7987793034977384
          entropy_coeff: 0.009999999999999998
          kl: 0.014163508809255947
          policy_loss: -0.06417137670020262
          total_loss: -0.07253191127545304
          vf_explained_var: -0.25439420342445374
          vf_loss: 0.000552378444343857
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930000
  iterations_since_restore: 930
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,930,22845.6,930000,-0.07,0,-7,366.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-09_04-45-29
  done: false
  episode_len_mean: 366.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2579
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8895142449273004
          entropy_coeff: 0.009999999999999998
          kl: 0.013514141268696071
          policy_loss: -0.056252439889229004
          total_loss: -0.06590628107595775
          vf_explained_var: -0.9462615251541138
          vf_loss: 0.0005824843592967631
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000
  iterations_since_restore: 931
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,931,22866.4,931000,-0.07,0,-7,366.32




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-09_04-46-09
  done: false
  episode_len_mean: 366.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2582
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9318480239974127
          entropy_coeff: 0.009999999999999998
          kl: 0.013643844701225226
          policy_loss: -0.06323887550582488
          total_loss: -0.07343075349926949
          vf_explained_var: -1.0
          vf_loss: 0.00038468210437309205
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000
  iterations_since_restore: 932
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,932,22906.6,932000,-0.07,0,-7,366.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-10-09_04-46-33
  done: false
  episode_len_mean: 365.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2585
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7968187994427152
          entropy_coeff: 0.009999999999999998
          kl: 0.010567960228470152
          policy_loss: -0.06134501316895088
          total_loss: -0.07220425634748406
          vf_explained_var: -0.09429942071437836
          vf_loss: 0.0003378131502864158
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 933000
  iterations_since_restore: 933
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,933,22930.4,933000,-0.07,0,-7,365.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-10-09_04-46-54
  done: false
  episode_len_mean: 365.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2587
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8751005835003323
          entropy_coeff: 0.009999999999999998
          kl: 0.014297954135071301
          policy_loss: -0.09120672231333124
          total_loss: -0.10013399831950665
          vf_explained_var: -0.6818377375602722
          vf_loss: 0.0006627062474662024
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 934000
  iterations_since_restore: 934
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,934,22951.3,934000,-0.07,0,-7,365.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-10-09_04-47-16
  done: false
  episode_len_mean: 366.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2590
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0149856752819484
          entropy_coeff: 0.009999999999999998
          kl: 0.015169657714314595
          policy_loss: -0.14077084350089233
          total_loss: -0.15086242059866586
          vf_explained_var: -0.6344688534736633
          vf_loss: 0.00033873256632230346
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 935000
  iterations_since_restore: 935
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,935,22973.6,935000,-0.07,0,-7,366.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-10-09_04-47-38
  done: false
  episode_len_mean: 367.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2592
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8902777327431572
          entropy_coeff: 0.009999999999999998
          kl: 0.014162065148488641
          policy_loss: -0.06267827252546947
          total_loss: -0.07224497555030716
          vf_explained_var: -0.7754518389701843
          vf_loss: 0.0002621163935044832
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 936000
  iterations_since_restore: 936
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,936,22995.9,936000,-0.07,0,-7,367.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-10-09_04-48-00
  done: false
  episode_len_mean: 368.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2595
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.874773253334893
          entropy_coeff: 0.009999999999999998
          kl: 0.017265800157230476
          policy_loss: -0.11034623069895638
          total_loss: -0.11776583960486783
          vf_explained_var: -0.947019636631012
          vf_loss: 0.00026553578031275214
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 937000
  iterations_since_restore: 937
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,937,23017.3,937000,-0.07,0,-7,368.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-10-09_04-48-22
  done: false
  episode_len_mean: 369.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2598
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8736201365788778
          entropy_coeff: 0.009999999999999998
          kl: 0.01359948759563415
          policy_loss: -0.09930263830141889
          total_loss: -0.10914924759417773
          vf_explained_var: -0.9948514103889465
          vf_loss: 0.00017608941586028475
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 938000
  iterations_since_restore: 938
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,938,23039.7,938000,-0.07,0,-7,369.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-10-09_04-48-46
  done: false
  episode_len_mean: 370.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2601
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.821941371758779
          entropy_coeff: 0.009999999999999998
          kl: 0.016402438436986404
          policy_loss: -0.11386977632840474
          total_loss: -0.12138413278799919
          vf_explained_var: -0.2930007576942444
          vf_loss: 0.00019564241605015317
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 939000
  iterations_since_restore: 939
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,939,23062.9,939000,-0.07,0,-7,370.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-10-09_04-49-09
  done: false
  episode_len_mean: 370.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2604
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8739745537439982
          entropy_coeff: 0.009999999999999998
          kl: 0.014139524629683011
          policy_loss: -0.03902867140455379
          total_loss: -0.048536306247115135
          vf_explained_var: -0.38647934794425964
          vf_loss: 0.00017259772891217533
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained: 940000
  iterations_since_restore: 940
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,940,23085.9,940000,-0.07,0,-7,370.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-10-09_04-49-31
  done: false
  episode_len_mean: 369.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 2606
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9217857652240329
          entropy_coeff: 0.009999999999999998
          kl: 0.012485012594098044
          policy_loss: -0.08361837719049718
          total_loss: -0.09469167575654057
          vf_explained_var: -0.8962185978889465
          vf_loss: 0.0001451266454953131
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941000
  iterations_since_restore: 941
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,941,23108.1,941000,-0.07,0,-7,369.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-10-09_04-49-53
  done: false
  episode_len_mean: 370.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2609
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.872504136297438
          entropy_coeff: 0.009999999999999998
          kl: 0.015240847090648737
          policy_loss: -0.07493990113337835
          total_loss: -0.08374541583988401
          vf_explained_var: -0.7308335900306702
          vf_loss: 0.00015437358682296082
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 942000
  iterations_since_restore: 942
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,942,23130.3,942000,-0.07,0,-7,370.91




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-10-09_04-50-36
  done: false
  episode_len_mean: 369.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2612
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.821350724167294
          entropy_coeff: 0.009999999999999998
          kl: 0.01808377014686376
          policy_loss: -0.0817211871035397
          total_loss: -0.08822150516013304
          vf_explained_var: -0.33702099323272705
          vf_loss: 0.00012650619731478704
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943000
  iterations_since_restore: 943
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,943,23172.9,943000,-0.07,0,-7,369.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-10-09_04-51-02
  done: false
  episode_len_mean: 367.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2615
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8751978423860338
          entropy_coeff: 0.009999999999999998
          kl: 0.01519829370857061
          policy_loss: -0.1180493804729647
          total_loss: -0.1269896623575025
          vf_explained_var: -0.602850079536438
          vf_loss: 7.380509673061573e-05
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 944000
  iterations_since_restore: 944
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,944,23198.9,944000,-0.07,0,-7,367.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-10-09_04-51-23
  done: false
  episode_len_mean: 366.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2618
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8182286739349365
          entropy_coeff: 0.009999999999999998
          kl: 0.014439493308263746
          policy_loss: -0.10725927427411079
          total_loss: -0.11611085728638702
          vf_explained_var: -0.8279612064361572
          vf_loss: 7.899412707451524e-05
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945000
  iterations_since_restore: 945
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,945,23220.7,945000,-0.07,0,-7,366.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-10-09_04-51-50
  done: false
  episode_len_mean: 365.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2621
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.682758969730801
          entropy_coeff: 0.009999999999999998
          kl: 0.012480524732935233
          policy_loss: -0.10124860006488032
          total_loss: -0.10999622069713143
          vf_explained_var: -0.42654550075531006
          vf_loss: 8.34157875134325e-05
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 946000
  iterations_since_restore: 946
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,946,23246.9,946000,-0.07,0,-7,365.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-10-09_04-52-15
  done: false
  episode_len_mean: 363.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2624
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7695422940784031
          entropy_coeff: 0.009999999999999998
          kl: 0.015613318511477578
          policy_loss: -0.06848094449895951
          total_loss: -0.07611075940852365
          vf_explained_var: -0.4310099184513092
          vf_loss: 6.180061947007845e-05
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 947000
  iterations_since_restore: 947
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,947,23272.5,947000,-0.07,0,-7,363.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-10-09_04-52-40
  done: false
  episode_len_mean: 362.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2627
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8336363712946573
          entropy_coeff: 0.009999999999999998
          kl: 0.013710379084777319
          policy_loss: -0.08393118352525764
          total_loss: -0.09342860794729657
          vf_explained_var: -0.42462536692619324
          vf_loss: 5.438870806149983e-05
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 948000
  iterations_since_restore: 948
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,948,23297,948000,-0.07,0,-7,362.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-10-09_04-53-06
  done: false
  episode_len_mean: 359.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2630
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9783898499276904
          entropy_coeff: 0.009999999999999998
          kl: 0.014699851787337043
          policy_loss: -0.05125735894673401
          total_loss: -0.061579191436370216
          vf_explained_var: -0.5168194770812988
          vf_loss: 4.353797222494096e-05
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 949000
  iterations_since_restore: 949
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,949,23323.2,949000,-0.07,0,-7,359.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-10-09_04-53-30
  done: false
  episode_len_mean: 358.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2633
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8075161973635356
          entropy_coeff: 0.009999999999999998
          kl: 0.011897567120060239
          policy_loss: -0.1127203622005052
          total_loss: -0.12311354908678267
          vf_explained_var: -0.5296413898468018
          vf_loss: 5.893516667533226e-05
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 950000
  iterations_since_restore: 950
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,950,23347.3,950000,-0.07,0,-7,358.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-10-09_04-53-55
  done: false
  episode_len_mean: 358.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2636
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.811069252755907
          entropy_coeff: 0.009999999999999998
          kl: 0.014966004841530909
          policy_loss: -0.06043544900086191
          total_loss: -0.06888372702524066
          vf_explained_var: -0.08821336179971695
          vf_loss: 7.335834759740263e-05
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 951000
  iterations_since_restore: 951
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,951,23372.3,951000,-0.07,0,-7,358.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-10-09_04-54-37
  done: false
  episode_len_mean: 356.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 2640
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8360737178060744
          entropy_coeff: 0.009999999999999998
          kl: 0.013727150912719841
          policy_loss: -0.01459728790861037
          total_loss: -0.024125262422280178
          vf_explained_var: -0.6179487705230713
          vf_loss: 3.746393610223701e-05
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 952000
  iterations_since_restore: 952
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,952,23413.6,952000,-0.07,0,-7,356.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-10-09_04-55-02
  done: false
  episode_len_mean: 355.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2643
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9602102319399515
          entropy_coeff: 0.009999999999999998
          kl: 0.012978250179763073
          policy_loss: -0.04030253924429417
          total_loss: -0.051560767533050646
          vf_explained_var: -0.4908405840396881
          vf_loss: 2.8413728776083897e-05
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953000
  iterations_since_restore: 953
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,953,23439.5,953000,-0.07,0,-7,355.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-10-09_04-55-28
  done: false
  episode_len_mean: 355.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 2646
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8090720494588217
          entropy_coeff: 0.009999999999999998
          kl: 0.01060102964251644
          policy_loss: -0.08453587277068032
          total_loss: -0.09581716913315985
          vf_explained_var: -0.5228052139282227
          vf_loss: 1.7103521642841063e-05
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 954000
  iterations_since_restore: 954
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,954,23464.7,954000,-0.07,0,-7,355.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-10-09_04-55-53
  done: false
  episode_len_mean: 353.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2649
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8968892137209574
          entropy_coeff: 0.009999999999999998
          kl: 0.013223169059989538
          policy_loss: -0.1311392562256919
          total_loss: -0.14161720749818615
          vf_explained_var: -0.8971911668777466
          vf_loss: 1.855684727666408e-05
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 955000
  iterations_since_restore: 955
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,955,23490.4,955000,0,0,0,353.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-10-09_04-56-17
  done: false
  episode_len_mean: 352.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2652
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9363596465852526
          entropy_coeff: 0.009999999999999998
          kl: 0.015310014522996834
          policy_loss: -0.0786951336181826
          total_loss: -0.08821627448002498
          vf_explained_var: -0.2868012487888336
          vf_loss: 3.2980888691882785e-05
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 956000
  iterations_since_restore: 956
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,956,23514.5,956000,0,0,0,352.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-10-09_04-56-45
  done: false
  episode_len_mean: 350.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2655
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6973508662647672
          entropy_coeff: 0.009999999999999998
          kl: 0.016035732325139346
          policy_loss: -0.059346076452897654
          total_loss: -0.0660221748571429
          vf_explained_var: -0.2560184597969055
          vf_loss: 2.295221728571859e-05
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 957000
  iterations_since_restore: 957
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,957,23542,957000,0,0,0,350.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-10-09_04-57-13
  done: false
  episode_len_mean: 347.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2659
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0313418904940286
          entropy_coeff: 0.009999999999999998
          kl: 0.013271141642274782
          policy_loss: -0.08175213448703289
          total_loss: -0.09354991652071476
          vf_explained_var: -0.2915552854537964
          vf_loss: 1.2513748879428022e-05
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 958000
  iterations_since_restore: 958
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,958,23569.7,958000,0,0,0,347.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-10-09_04-57-36
  done: false
  episode_len_mean: 346.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2661
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9321246941884358
          entropy_coeff: 0.009999999999999998
          kl: 0.014259081990156573
          policy_loss: -0.09922115864853064
          total_loss: -0.10937309610760874
          vf_explained_var: -0.5217522382736206
          vf_loss: 3.3191783061031146e-05
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained: 959000
  iterations_since_restore: 959
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,959,23593.3,959000,0,0,0,346.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-10-09_04-58-00
  done: false
  episode_len_mean: 345.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2664
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9347857303089566
          entropy_coeff: 0.009999999999999998
          kl: 0.015071093822369678
          policy_loss: -0.0880492769388689
          total_loss: -0.09772206602825059
          vf_explained_var: -0.17024022340774536
          vf_loss: 1.867822492284985e-05
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 960000
  iterations_since_restore: 960
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,960,23616.7,960000,0,0,0,345.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-10-09_04-58-25
  done: false
  episode_len_mean: 342.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2668
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8253903031349181
          entropy_coeff: 0.009999999999999998
          kl: 0.010763346332930867
          policy_loss: -0.013705201364225811
          total_loss: -0.025047396620114643
          vf_explained_var: 0.03698887676000595
          vf_loss: 1.538698680229168e-05
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_trained: 961000
  iterations_since_restore: 961
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,961,23642.3,961000,0,0,0,342.38




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-10-09_04-59-09
  done: false
  episode_len_mean: 341.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2671
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.906396789020962
          entropy_coeff: 0.009999999999999998
          kl: 0.013829377864051296
          policy_loss: -0.09564027328872018
          total_loss: -0.10581930453578631
          vf_explained_var: -0.8932756781578064
          vf_loss: 2.4140183631971014e-05
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 962000
  iterations_since_restore: 962
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,962,23685.7,962000,0,0,0,341.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-10-09_04-59-32
  done: false
  episode_len_mean: 340.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2673
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.968661175833808
          entropy_coeff: 0.009999999999999998
          kl: 0.01285239806947496
          policy_loss: -0.07635196258003513
          total_loss: -0.08779112045756644
          vf_explained_var: -0.4499149024486542
          vf_loss: 1.2631503250304376e-05
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 963000
  iterations_since_restore: 963
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,963,23708.5,963000,0,0,0,340.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-10-09_04-59-55
  done: false
  episode_len_mean: 341.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2676
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.720587468147278
          entropy_coeff: 0.009999999999999998
          kl: 0.02000963101326071
          policy_loss: -0.009129723409811656
          total_loss: 0.2874311188856761
          vf_explained_var: 0.05225416645407677
          vf_loss: 0.30094609224793706
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 964000
  iterations_since_restore: 964
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,964,23731.7,964000,-0.14,0,-14,341.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-10-09_05-00-23
  done: false
  episode_len_mean: 338.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2679
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8624026775360107
          entropy_coeff: 0.009999999999999998
          kl: 0.007334267227944637
          policy_loss: -0.012106443444887796
          total_loss: -0.018694726165591015
          vf_explained_var: -0.11972237378358841
          vf_loss: 0.004986896911739475
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained: 965000
  iterations_since_restore: 965
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,965,23759.7,965000,-0.14,0,-14,338.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-10-09_05-00-47
  done: false
  episode_len_mean: 338.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2682
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8945121844609578
          entropy_coeff: 0.009999999999999998
          kl: 0.012128736589897531
          policy_loss: -0.08996115581442912
          total_loss: -0.0893150562627448
          vf_explained_var: -0.24645374715328217
          vf_loss: 0.007934479875903991
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 966000
  iterations_since_restore: 966
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,966,23783.3,966000,-0.14,0,-14,338.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-10-09_05-01-11
  done: false
  episode_len_mean: 338.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2685
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9350194480684069
          entropy_coeff: 0.009999999999999998
          kl: 0.015260916110120506
          policy_loss: -0.07445661922295889
          total_loss: -0.07366155036207703
          vf_explained_var: 0.029975276440382004
          vf_loss: 0.005478240712545812
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained: 967000
  iterations_since_restore: 967
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,967,23808,967000,-0.14,0,-14,338.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-10-09_05-01-38
  done: false
  episode_len_mean: 335.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2688
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8355393131573996
          entropy_coeff: 0.009999999999999998
          kl: 0.010538279404286123
          policy_loss: -0.07352207439641158
          total_loss: -0.07496787404848469
          vf_explained_var: -0.6456483602523804
          vf_loss: 0.006781422225241032
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 968000
  iterations_since_restore: 968
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,968,23834.7,968000,-0.14,0,-14,335.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-10-09_05-02-05
  done: false
  episode_len_mean: 333.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 4
  episodes_total: 2692
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6948541733953688
          entropy_coeff: 0.009999999999999998
          kl: 0.011953826886196752
          policy_loss: -0.13262279190950924
          total_loss: -0.13516356605622504
          vf_explained_var: -0.3628486692905426
          vf_loss: 0.002919134598535796
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 969000
  iterations_since_restore: 969
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,969,23862.2,969000,-0.14,0,-14,333.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-10-09_05-02-27
  done: false
  episode_len_mean: 332.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 2694
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7614529967308044
          entropy_coeff: 0.009999999999999998
          kl: 0.011487785409828522
          policy_loss: -0.07587706386629078
          total_loss: -0.07503508432871765
          vf_explained_var: 0.34323346614837646
          vf_loss: 0.007415783009491861
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 970000
  iterations_since_restore: 970
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,970,23883.6,970000,-0.14,0,-14,332.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-10-09_05-02-51
  done: false
  episode_len_mean: 332.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2697
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9320191356870864
          entropy_coeff: 0.009999999999999998
          kl: 0.012817488518997382
          policy_loss: -0.08933762086348401
          total_loss: -0.09219518053448862
          vf_explained_var: 0.023212840780615807
          vf_loss: 0.004143948915104071
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 971000
  iterations_since_restore: 971
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,971,23907.9,971000,-0.14,0,-14,332.74




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-10-09_05-03-32
  done: false
  episode_len_mean: 331.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2700
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8521853182050916
          entropy_coeff: 0.009999999999999998
          kl: 0.014808241680116928
          policy_loss: -0.059677204489707945
          total_loss: -0.06192213023702304
          vf_explained_var: -0.14000293612480164
          vf_loss: 0.0020449603739608494
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 972000
  iterations_since_restore: 972
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,972,23949,972000,-0.14,0,-14,331.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-10-09_05-03-57
  done: false
  episode_len_mean: 331.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2703
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4467309521304237
          entropy_coeff: 0.009999999999999998
          kl: 0.012481607697357885
          policy_loss: -0.10674754045903682
          total_loss: -0.10621496823926767
          vf_explained_var: -0.279569536447525
          vf_loss: 0.0030040061075447336
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 973000
  iterations_since_restore: 973
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,973,23974,973000,-0.14,0,-14,331.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-10-09_05-04-20
  done: false
  episode_len_mean: 331.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2706
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.636040038532681
          entropy_coeff: 0.009999999999999998
          kl: 0.01191932665965102
          policy_loss: -0.10466191135346889
          total_loss: -0.107118769052128
          vf_explained_var: -0.7802460789680481
          vf_loss: 0.0024480673174063364
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000
  iterations_since_restore: 974
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,974,23997,974000,-0.14,0,-14,331.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-10-09_05-04-43
  done: false
  episode_len_mean: 330.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2709
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9095009724299112
          entropy_coeff: 0.009999999999999998
          kl: 0.012285865298708802
          policy_loss: -0.067028416113721
          total_loss: -0.07195724054343171
          vf_explained_var: -0.6294282078742981
          vf_loss: 0.00235843879248326
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 975000
  iterations_since_restore: 975
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,975,24019.8,975000,-0.14,0,-14,330.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-10-09_05-05-09
  done: false
  episode_len_mean: 331.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2712
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.880130684375763
          entropy_coeff: 0.009999999999999998
          kl: 0.011168095057122369
          policy_loss: -0.06833522764758931
          total_loss: -0.07405878454446793
          vf_explained_var: -0.5877977013587952
          vf_loss: 0.0023442720166511007
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 976000
  iterations_since_restore: 976
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,976,24045.5,976000,-0.14,0,-14,331.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-10-09_05-05-33
  done: false
  episode_len_mean: 331.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2715
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7102944175402324
          entropy_coeff: 0.009999999999999998
          kl: 0.010033531238075079
          policy_loss: -0.02387044189704789
          total_loss: -0.029514800012111663
          vf_explained_var: -0.5803700685501099
          vf_loss: 0.0018155178423815718
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 977000
  iterations_since_restore: 977
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,977,24069.7,977000,-0.14,0,-14,331.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-10-09_05-06-00
  done: false
  episode_len_mean: 330.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2718
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8484309792518616
          entropy_coeff: 0.009999999999999998
          kl: 0.009456913362596452
          policy_loss: -0.06826026290655136
          total_loss: -0.07654053154918883
          vf_explained_var: -0.5462684035301208
          vf_loss: 0.0011151510297269043
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained: 978000
  iterations_since_restore: 978
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,978,24096.2,978000,-0.14,0,-14,330.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-10-09_05-06-21
  done: false
  episode_len_mean: 331.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 2720
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8406805131170485
          entropy_coeff: 0.009999999999999998
          kl: 0.013265869776433981
          policy_loss: -0.059708435388488905
          total_loss: -0.06374532106435961
          vf_explained_var: -0.5261934399604797
          vf_loss: 0.0016203039183488323
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 979000
  iterations_since_restore: 979
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,979,24117.3,979000,-0.14,0,-14,331.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-10-09_05-06-45
  done: false
  episode_len_mean: 333.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2723
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.882053224245707
          entropy_coeff: 0.009999999999999998
          kl: 0.01143951018966381
          policy_loss: -0.067380809949504
          total_loss: -0.07448605071339343
          vf_explained_var: -0.8288690447807312
          vf_loss: 0.0007209612211833398
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 980000
  iterations_since_restore: 980
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,980,24141.3,980000,-0.14,0,-14,333.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-10-09_05-07-10
  done: false
  episode_len_mean: 333.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2726
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9106151448355781
          entropy_coeff: 0.009999999999999998
          kl: 0.010902202003735064
          policy_loss: -0.08838616204965445
          total_loss: -0.09601397313591507
          vf_explained_var: -0.964009702205658
          vf_loss: 0.0010004071926232427
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981000
  iterations_since_restore: 981
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,981,24166.1,981000,-0.14,0,-14,333.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-10-09_05-07-31
  done: false
  episode_len_mean: 334.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2729
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9392519765430027
          entropy_coeff: 0.009999999999999998
          kl: 0.011775322517621036
          policy_loss: -0.11257058965663115
          total_loss: -0.11977917825182278
          vf_explained_var: -0.825080931186676
          vf_loss: 0.0008668593169810871
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 982000
  iterations_since_restore: 982
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,982,24187.3,982000,-0.14,0,-14,334.86




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-10-09_05-08-14
  done: false
  episode_len_mean: 335.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2732
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0455261402659946
          entropy_coeff: 0.009999999999999998
          kl: 0.011015484711472961
          policy_loss: -0.08960008497039477
          total_loss: -0.09898545843445593
          vf_explained_var: -0.9871817827224731
          vf_loss: 0.00048307912236648714
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained: 983000
  iterations_since_restore: 983
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,983,24231,983000,-0.14,0,-14,335.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-10-09_05-08-39
  done: false
  episode_len_mean: 333.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2735
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7713770919375948
          entropy_coeff: 0.009999999999999998
          kl: 0.009649417746182736
          policy_loss: -0.06367734506105384
          total_loss: -0.07073754967293805
          vf_explained_var: -0.7669277787208557
          vf_loss: 0.0013796668978304498
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 984000
  iterations_since_restore: 984
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,984,24255.8,984000,-0.14,0,-14,333.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-10-09_05-09-04
  done: false
  episode_len_mean: 335.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2738
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0246341718567744
          entropy_coeff: 0.009999999999999998
          kl: 0.012455813011353574
          policy_loss: -0.09844867040713629
          total_loss: -0.10630137018031544
          vf_explained_var: -0.7566000819206238
          vf_loss: 0.000422559214671815
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985000
  iterations_since_restore: 985
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,985,24280.1,985000,-0.14,0,-14,335.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-10-09_05-09-30
  done: false
  episode_len_mean: 335.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2741
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8740947326024373
          entropy_coeff: 0.009999999999999998
          kl: 0.012235883958614698
          policy_loss: -0.103748516821199
          total_loss: -0.11003990848031309
          vf_explained_var: -0.9907447695732117
          vf_loss: 0.0006898405712692895
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_trained: 986000
  iterations_since_restore: 986
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,986,24306.1,986000,-0.14,0,-14,335.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-10-09_05-09-53
  done: false
  episode_len_mean: 335.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2744
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.806494993633694
          entropy_coeff: 0.009999999999999998
          kl: 0.012395127236435656
          policy_loss: -0.07573585580620501
          total_loss: -0.0811406489668621
          vf_explained_var: -0.7521275281906128
          vf_loss: 0.0007473957928596064
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 987000
  iterations_since_restore: 987
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,987,24328.9,987000,-0.14,0,-14,335.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-10-09_05-10-14
  done: false
  episode_len_mean: 337.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 2746
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8365160385767618
          entropy_coeff: 0.009999999999999998
          kl: 0.014207439000548385
          policy_loss: -0.0944239998029338
          total_loss: -0.0984916608987583
          vf_explained_var: -0.7246049642562866
          vf_loss: 0.0006429572340696016
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 988000
  iterations_since_restore: 988
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,988,24350.5,988000,-0.14,0,-14,337.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-10-09_05-10-40
  done: false
  episode_len_mean: 337.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 4
  episodes_total: 2750
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0355219152238635
          entropy_coeff: 0.009999999999999998
          kl: 0.01100586423096459
          policy_loss: 0.005447155692511135
          total_loss: -0.003977656923234462
          vf_explained_var: -0.7929739356040955
          vf_loss: 0.0003528468616423197
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained: 989000
  iterations_since_restore: 989
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,989,24376.4,989000,-0.14,0,-14,337.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-10-09_05-11-05
  done: false
  episode_len_mean: 337.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2753
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.747481515672472
          entropy_coeff: 0.009999999999999998
          kl: 0.008466252395132576
          policy_loss: -0.11058873670796553
          total_loss: -0.11941037654048867
          vf_explained_var: -0.930537223815918
          vf_loss: 0.0005163974319455317
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990000
  iterations_since_restore: 990
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,990,24401.4,990000,-0.14,0,-14,337.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-10-09_05-11-30
  done: false
  episode_len_mean: 338.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2756
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.758606896135542
          entropy_coeff: 0.009999999999999998
          kl: 0.011101027204284047
          policy_loss: -0.027678330035673247
          total_loss: -0.03411876435081164
          vf_explained_var: -0.7364451885223389
          vf_loss: 0.0004766167358158984
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 991000
  iterations_since_restore: 991
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,991,24425.9,991000,-0.14,0,-14,338.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-10-09_05-11-56
  done: false
  episode_len_mean: 339.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2759
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8411757270495097
          entropy_coeff: 0.009999999999999998
          kl: 0.013301731915115687
          policy_loss: -0.12300021578040388
          total_loss: -0.12817558472355206
          vf_explained_var: -0.29818862676620483
          vf_loss: 0.00045230890391394495
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 992000
  iterations_since_restore: 992
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,992,24452,992000,-0.14,0,-14,339.41




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-10-09_05-12-36
  done: false
  episode_len_mean: 338.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2762
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8738302244080438
          entropy_coeff: 0.009999999999999998
          kl: 0.0106006128521675
          policy_loss: -0.0887548984752761
          total_loss: -0.09678699852277835
          vf_explained_var: -0.9984621405601501
          vf_loss: 0.0005181236241090422
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 993000
  iterations_since_restore: 993
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,993,24492.5,993000,-0.14,0,-14,338.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-10-09_05-13-00
  done: false
  episode_len_mean: 338.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2765
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.80612745947308
          entropy_coeff: 0.009999999999999998
          kl: 0.014881180953885575
          policy_loss: -0.08533373114963373
          total_loss: -0.08825611643907097
          vf_explained_var: -0.49829813838005066
          vf_loss: 0.0008368234832434811
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 994000
  iterations_since_restore: 994
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,994,24516,994000,-0.14,0,-14,338.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-10-09_05-13-25
  done: false
  episode_len_mean: 339.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2768
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.011039674282074
          entropy_coeff: 0.009999999999999998
          kl: 0.012690483172759892
          policy_loss: -0.065972960078054
          total_loss: -0.07352397830949889
          vf_explained_var: -0.7453646063804626
          vf_loss: 0.00036275731787706414
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 995000
  iterations_since_restore: 995
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,995,24540.9,995000,-0.14,0,-14,339.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-10-09_05-13-50
  done: false
  episode_len_mean: 339.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2771
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9776798844337464
          entropy_coeff: 0.009999999999999998
          kl: 0.011789353344338751
          policy_loss: -0.10482380377749602
          total_loss: -0.11301914354165395
          vf_explained_var: -0.914196252822876
          vf_loss: 0.0002508973187003802
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 996000
  iterations_since_restore: 996
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,996,24566.3,996000,-0.14,0,-14,339.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-10-09_05-14-17
  done: false
  episode_len_mean: 337.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 2774
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7323027822706434
          entropy_coeff: 0.009999999999999998
          kl: 0.012774409391020371
          policy_loss: -0.11312476491762533
          total_loss: -0.11777239108665122
          vf_explained_var: -0.6502625942230225
          vf_loss: 0.000398123575885418
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 997000
  iterations_since_restore: 997
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,997,24593.1,997000,-0.14,0,-14,337.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-10-09_05-14-42
  done: false
  episode_len_mean: 336.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2777
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8463834722836812
          entropy_coeff: 0.009999999999999998
          kl: 0.010644834481908042
          policy_loss: -0.08106671468251281
          total_loss: -0.0889819572162297
          vf_explained_var: -0.9716154336929321
          vf_loss: 0.0003180146965961386
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 998000
  iterations_since_restore: 998
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,998,24617.9,998000,0,0,0,336.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-10-09_05-15-07
  done: false
  episode_len_mean: 338.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2780
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8108767019377814
          entropy_coeff: 0.009999999999999998
          kl: 0.012675684696974363
          policy_loss: -0.08325212833782038
          total_loss: -0.08889980800449848
          vf_explained_var: -0.6346710324287415
          vf_loss: 0.00027868947694918863
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 999000
  iterations_since_restore: 999
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,999,24642.9,999000,0,0,0,338.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-10-09_05-15-31
  done: false
  episode_len_mean: 337.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2783
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9255613989300198
          entropy_coeff: 0.009999999999999998
          kl: 0.011255887609771836
          policy_loss: -0.05694285591857301
          total_loss: -0.06522456200586425
          vf_explained_var: -0.8452459573745728
          vf_loss: 0.0001560547679724146
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
  iterations_since_restore: 1000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1000,24666.7,1000000,0,0,0,337.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-10-09_05-15-54
  done: false
  episode_len_mean: 337.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2786
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9992603845066494
          entropy_coeff: 0.009999999999999998
          kl: 0.012379604591846238
          policy_loss: -0.04240743739323483
          total_loss: -0.05031788800325659
          vf_explained_var: -0.9316735863685608
          vf_loss: 0.00018431311643022733
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_trained: 1001000
  iterations_since_restore: 1001


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1001,24690.3,1001000,0,0,0,337.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-10-09_05-16-19
  done: false
  episode_len_mean: 338.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2789
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9708523988723754
          entropy_coeff: 0.009999999999999998
          kl: 0.01013595395574952
          policy_loss: -0.04560197260644701
          total_loss: -0.05548859464211597
          vf_explained_var: -0.6598167419433594
          vf_loss: 8.039971233099802e-05
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_trained: 1002000
  iterations_since_restore: 1002
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1002,24715.4,1002000,0,0,0,338.63




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-10-09_05-17-02
  done: false
  episode_len_mean: 340.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2792
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8912110262446933
          entropy_coeff: 0.009999999999999998
          kl: 0.006899872125009981
          policy_loss: 0.011597876581880781
          total_loss: 0.04566100090742111
          vf_explained_var: -0.04794967547059059
          vf_loss: 0.04634388007689268
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_trained: 1003000
  iterations_since_restore: 1003


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1003,24758.2,1003000,-0.03,0,-3,340.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-10-09_05-17-26
  done: false
  episode_len_mean: 338.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2795
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.858232639895545
          entropy_coeff: 0.009999999999999998
          kl: 0.013000087279706
          policy_loss: -0.07748683335052596
          total_loss: -0.07749806905372275
          vf_explained_var: 0.02848857454955578
          vf_loss: 0.00607691501500085
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_trained: 1004000
  iterations_since_restore: 1004
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1004,24782,1004000,-0.03,0,-3,338.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-10-09_05-17-50
  done: false
  episode_len_mean: 338.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2798
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6092990239461262
          entropy_coeff: 0.009999999999999998
          kl: 0.012860131590711818
          policy_loss: -0.10212569379558166
          total_loss: -0.10234308929906952
          vf_explained_var: 0.08596944063901901
          vf_loss: 0.0035159321036189795
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_trained: 1005000
  iterations_since_restore: 100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1005,24806.4,1005000,-0.03,0,-3,338.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-10-09_05-18-14
  done: false
  episode_len_mean: 338.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2800
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.832224252488878
          entropy_coeff: 0.009999999999999998
          kl: 0.010614475085539738
          policy_loss: -0.07945428954230414
          total_loss: -0.08595270613829295
          vf_explained_var: 0.06263411045074463
          vf_loss: 0.0016224237417595253
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_trained: 1006000
  iterations_since_restore: 1006

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1006,24829.7,1006000,-0.03,0,-3,338.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-10-09_05-18-34
  done: false
  episode_len_mean: 341.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2803
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8786856532096863
          entropy_coeff: 0.009999999999999998
          kl: 0.011325529067826265
          policy_loss: -0.13239497519615623
          total_loss: -0.1392434795283609
          vf_explained_var: -0.9018452763557434
          vf_loss: 0.0010535647116032326
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trained: 1007000
  iterations_since_restore: 1007

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1007,24849.8,1007000,-0.03,0,-3,341.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-10-09_05-18-56
  done: false
  episode_len_mean: 341.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2805
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9765128797954983
          entropy_coeff: 0.009999999999999998
          kl: 0.01231664408882046
          policy_loss: -0.1142447832143969
          total_loss: -0.12154679927561018
          vf_explained_var: -0.9820522665977478
          vf_loss: 0.0006257812317926436
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_trained: 1008000
  iterations_since_restore: 1008


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1008,24871.7,1008000,-0.03,0,-3,341.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-10-09_05-19-20
  done: false
  episode_len_mean: 341.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2808
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9523841977119445
          entropy_coeff: 0.009999999999999998
          kl: 0.011423926891961303
          policy_loss: -0.055493912431928846
          total_loss: -0.06362822904354996
          vf_explained_var: -0.3670152425765991
          vf_loss: 0.00041017249362388004
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_trained: 1009000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1009,24896.2,1009000,-0.03,0,-3,341.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-10-09_05-19-43
  done: false
  episode_len_mean: 341.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2811
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.955671078628964
          entropy_coeff: 0.009999999999999998
          kl: 0.011333567248535244
          policy_loss: -0.07379286102950573
          total_loss: -0.08203299761646324
          vf_explained_var: -0.15659774839878082
          vf_loss: 0.0004240622473945324
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
  iterations_since_restore: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1010,24919.1,1010000,-0.03,0,-3,341.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-10-09_05-20-05
  done: false
  episode_len_mean: 343.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2814
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9610987689759996
          entropy_coeff: 0.009999999999999998
          kl: 0.010062829536299332
          policy_loss: -0.08225645112494627
          total_loss: -0.09159902127252685
          vf_explained_var: -0.46532341837882996
          vf_loss: 0.0005971896918102478
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained: 1011000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1011,24941.3,1011000,-0.03,0,-3,343.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-10-09_05-20-28
  done: false
  episode_len_mean: 344.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2816
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9472824957635668
          entropy_coeff: 0.009999999999999998
          kl: 0.011265044983095536
          policy_loss: -0.11157889970474773
          total_loss: -0.11977395007593764
          vf_explained_var: -0.9188792109489441
          vf_loss: 0.0004511220167235782
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_trained: 1012000
  iterations_since_restore: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1012,24963.8,1012000,-0.03,0,-3,344.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-10-09_05-20-51
  done: false
  episode_len_mean: 343.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2819
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8373127751880223
          entropy_coeff: 0.009999999999999998
          kl: 0.013409161362585959
          policy_loss: -0.11059213272399372
          total_loss: -0.11566728129982948
          vf_explained_var: -0.5939290523529053
          vf_loss: 0.0004106500105182123
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_trained: 1013000
  iterations_since_restore: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1013,24987.2,1013000,-0.03,0,-3,343.99




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-10-09_05-21-34
  done: false
  episode_len_mean: 342.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 2823
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.82058281633589
          entropy_coeff: 0.009999999999999998
          kl: 0.013130150229420323
          policy_loss: -0.11173679811569552
          total_loss: -0.11707545359515481
          vf_explained_var: -0.6771278977394104
          vf_loss: 0.00024799872529304896
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trained: 1014000
  iterations_since_restore: 1014

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1014,25030.1,1014000,-0.03,0,-3,342.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-10-09_05-21-59
  done: false
  episode_len_mean: 342.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2825
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.799712720182207
          entropy_coeff: 0.009999999999999998
          kl: 0.013630392929480253
          policy_loss: -0.08577600566463338
          total_loss: -0.0902572298836377
          vf_explained_var: -0.9047141075134277
          vf_loss: 0.00041595112529143485
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_trained: 1015000
  iterations_since_restore: 1015

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1015,25054.4,1015000,-0.03,0,-3,342.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-10-09_05-22-21
  done: false
  episode_len_mean: 342.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2828
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8481524533695646
          entropy_coeff: 0.009999999999999998
          kl: 0.013413121865446288
          policy_loss: -0.08256398451824982
          total_loss: -0.08791725606554085
          vf_explained_var: -0.20076937973499298
          vf_loss: 0.00023711652570960318
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trained: 1016000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1016,25077.1,1016000,-0.03,0,-3,342.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-10-09_05-22-44
  done: false
  episode_len_mean: 343.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2831
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8203602075576781
          entropy_coeff: 0.009999999999999998
          kl: 0.009613542516069688
          policy_loss: -0.0661102662483851
          total_loss: -0.07450295752949185
          vf_explained_var: -0.4247657358646393
          vf_loss: 0.0005714862033427279
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_trained: 1017000
  iterations_since_restore: 1017

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1017,25100.3,1017000,-0.03,0,-3,343.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-10-09_05-23-08
  done: false
  episode_len_mean: 344.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2834
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.928065115875668
          entropy_coeff: 0.009999999999999998
          kl: 0.01485653353514675
          policy_loss: -0.08767808435691728
          total_loss: -0.09233496503697501
          vf_explained_var: -0.20126402378082275
          vf_loss: 0.0003453957503855539
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_trained: 1018000
  iterations_since_restore: 1018

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1018,25123.5,1018000,-0.03,0,-3,344.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-10-09_05-23-30
  done: false
  episode_len_mean: 344.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2837
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9369556056128607
          entropy_coeff: 0.009999999999999998
          kl: 0.01221030488529809
          policy_loss: -0.05055988043960598
          total_loss: -0.058001119995282756
          vf_explained_var: -0.7266709804534912
          vf_loss: 0.00019318858782450357
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_trained: 1019000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1019,25146.2,1019000,-0.03,0,-3,344.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-10-09_05-23-53
  done: false
  episode_len_mean: 345.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2839
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5760249336560568
          entropy_coeff: 0.009999999999999998
          kl: 0.011530469519572881
          policy_loss: -0.08972004246380594
          total_loss: -0.09407007015413708
          vf_explained_var: -0.20985832810401917
          vf_loss: 0.00032847026579677026
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_trained: 1020000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1020,25168.8,1020000,-0.03,0,-3,345.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-10-09_05-24-20
  done: false
  episode_len_mean: 345.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2842
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.859023752477434
          entropy_coeff: 0.009999999999999998
          kl: 0.010642869020172757
          policy_loss: -0.09666615029176076
          total_loss: -0.10487025284932719
          vf_explained_var: -0.48683789372444153
          vf_loss: 0.00015744120472744626
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trained: 1021000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1021,25195.9,1021000,-0.03,0,-3,345.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-10-09_05-24-43
  done: false
  episode_len_mean: 344.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2845
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0536352899339465
          entropy_coeff: 0.009999999999999998
          kl: 0.011525822636857717
          policy_loss: -0.13891680749754112
          total_loss: -0.1482664375876387
          vf_explained_var: -0.8375902771949768
          vf_loss: 0.00010943394792977617
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_trained: 1022000
  iterations_since_restore: 1022

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1022,25218.9,1022000,-0.03,0,-3,344.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-10-09_05-25-04
  done: false
  episode_len_mean: 346.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2848
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9125899261898465
          entropy_coeff: 0.009999999999999998
          kl: 0.004448287588747579
          policy_loss: -0.012181721296575335
          total_loss: -0.02081298565543774
          vf_explained_var: -0.4532717764377594
          vf_loss: 0.00621945576761694
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained: 1023000
  iterations_since_restore: 1023

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1023,25239.2,1023000,-0.03,0,-3,346.37




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-10-09_05-25-42
  done: false
  episode_len_mean: 347.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2850
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9284803615676032
          entropy_coeff: 0.009999999999999998
          kl: 0.01362215663586114
          policy_loss: -0.11830940447333786
          total_loss: -0.13047994261400567
          vf_explained_var: -0.1980636864900589
          vf_loss: 0.0005682482943585557
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained: 1024000
  iterations_since_restore: 1024

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1024,25277.6,1024000,-0.03,0,-3,347.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-10-09_05-26-05
  done: false
  episode_len_mean: 349.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2853
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.081825511985355
          entropy_coeff: 0.009999999999999998
          kl: 0.012034662737138612
          policy_loss: -0.12051316327932808
          total_loss: -0.13522425805115038
          vf_explained_var: -0.5952589511871338
          vf_loss: 0.0003240001606172882
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_trained: 1025000
  iterations_since_restore: 1025

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1025,25300.9,1025000,-0.03,0,-3,349.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-10-09_05-26-28
  done: false
  episode_len_mean: 351.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2855
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8353258980645073
          entropy_coeff: 0.009999999999999998
          kl: 0.01783520530722572
          policy_loss: -0.053973382690714465
          total_loss: -0.06330122088806497
          vf_explained_var: -0.12047605216503143
          vf_loss: 0.0004548563896807738
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trained: 1026000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1026,25323.4,1026000,-0.03,0,-3,351.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-10-09_05-26-46
  done: false
  episode_len_mean: 353.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2858
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4429269219438234
          entropy_coeff: 0.009999999999999998
          kl: 0.01901287189222026
          policy_loss: 0.14334449188576803
          total_loss: 0.1388344570994377
          vf_explained_var: -0.010849005542695522
          vf_loss: 0.0007827516069584009
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trained: 1027000
  iterations_since_restore: 1027


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1027,25341.6,1027000,-0.03,0,-3,353.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-10-09_05-27-12
  done: false
  episode_len_mean: 353.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2861
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.485919623904758
          entropy_coeff: 0.009999999999999998
          kl: 0.01345855798491758
          policy_loss: -0.045696606528427865
          total_loss: -0.053726227374540435
          vf_explained_var: 0.2086278647184372
          vf_loss: 0.0003621718783203202
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_trained: 1028000
  iterations_since_restore: 1028

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1028,25368,1028000,-0.03,0,-3,353.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-10-09_05-27-42
  done: false
  episode_len_mean: 349.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 2865
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1227360553211636
          entropy_coeff: 0.009999999999999998
          kl: 0.062188718313337395
          policy_loss: -0.06275870932473077
          total_loss: -0.04383582456244363
          vf_explained_var: 0.668714702129364
          vf_loss: 0.0002659535241643122
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_trained: 1029000
  iterations_since_restore: 1029


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1029,25397.5,1029000,-0.03,0,-3,349.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-10-09_05-28-07
  done: false
  episode_len_mean: 348.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2868
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.3170827468236288
          entropy_coeff: 0.009999999999999998
          kl: 0.01788688513921901
          policy_loss: -0.01867297494577037
          total_loss: -0.01859713883863555
          vf_explained_var: 0.5552991032600403
          vf_loss: 0.00035356187565614364
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_trained: 1030000
  iterations_since_restore: 1030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1030,25422.9,1030000,-0.03,0,-3,348.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-10-09_05-28-31
  done: false
  episode_len_mean: 347.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2871
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.3034065577718947
          entropy_coeff: 0.009999999999999998
          kl: 0.009371303690398344
          policy_loss: 0.003806172725227144
          total_loss: -0.0017660144716501237
          vf_explained_var: 0.21186436712741852
          vf_loss: 0.0007069225523284533
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_trained: 1031000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1031,25446,1031000,-0.03,0,-3,347.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-10-09_05-28-54
  done: false
  episode_len_mean: 347.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 2875
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6743212302525838
          entropy_coeff: 0.009999999999999998
          kl: 0.016398772243407262
          policy_loss: -0.06405864548352029
          total_loss: -0.06878334859179126
          vf_explained_var: 0.76945561170578
          vf_loss: 0.0001980560729053751
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_trained: 1032000
  iterations_since_restore: 1032
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1032,25469.4,1032000,-0.03,0,-3,347.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-10-09_05-29-14
  done: false
  episode_len_mean: 348.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 2877
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.7851634038819206
          entropy_coeff: 0.009999999999999998
          kl: 0.01032185484545055
          policy_loss: 0.038012649967438646
          total_loss: 0.0278416786963741
          vf_explained_var: 0.5892274379730225
          vf_loss: 0.00024053479808369756
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained: 1033000
  iterations_since_restore: 1033
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1033,25489.9,1033000,-0.03,0,-3,348.1




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-10-09_05-29-52
  done: false
  episode_len_mean: 349.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2880
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.7438493993547228
          entropy_coeff: 0.009999999999999998
          kl: 0.018235732964478737
          policy_loss: 0.017058799063993824
          total_loss: 0.012960410180191199
          vf_explained_var: 0.5099976658821106
          vf_loss: 0.00019555169314521158
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_trained: 1034000
  iterations_since_restore: 103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1034,25527.7,1034000,-0.03,0,-3,349.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-10-09_05-30-18
  done: false
  episode_len_mean: 349.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2883
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.993871765666538
          entropy_coeff: 0.009999999999999998
          kl: 0.01476601563989206
          policy_loss: -0.11356498396231068
          total_loss: -0.12263375566237503
          vf_explained_var: 0.03966198116540909
          vf_loss: 0.00022641043833573348
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_trained: 1035000
  iterations_since_restore: 1035

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1035,25553.7,1035000,-0.03,0,-3,349.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-10-09_05-30-43
  done: false
  episode_len_mean: 348.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2886
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9100601037343343
          entropy_coeff: 0.009999999999999998
          kl: 0.015518607368617647
          policy_loss: -0.12277849697404437
          total_loss: -0.13057779032323097
          vf_explained_var: -0.7085515260696411
          vf_loss: 0.0001152915885743116
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trained: 1036000
  iterations_since_restore: 103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1036,25578.3,1036000,-0.03,0,-3,348.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-10-09_05-31-07
  done: false
  episode_len_mean: 349.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 2889
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9088326123025683
          entropy_coeff: 0.009999999999999998
          kl: 0.012472417183372286
          policy_loss: -0.05939553011622694
          total_loss: -0.06941036470234394
          vf_explained_var: -1.0
          vf_loss: 8.321142393267817e-05
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trained: 1037000
  iterations_since_restore: 1037
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1037,25602.3,1037000,-0.03,0,-3,349.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-10-09_05-31-29
  done: false
  episode_len_mean: 350.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2892
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.917001454035441
          entropy_coeff: 0.009999999999999998
          kl: 0.012985240161728641
          policy_loss: -0.10413630886210336
          total_loss: -0.11377935475773282
          vf_explained_var: -0.6030008792877197
          vf_loss: 0.00016703839565808368
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_trained: 1038000
  iterations_since_restore: 1038
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1038,25624.6,1038000,0,0,0,350.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-10-09_05-31-54
  done: false
  episode_len_mean: 349.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2895
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9568216443061828
          entropy_coeff: 0.009999999999999998
          kl: 0.012093383923990054
          policy_loss: -0.11441124739746253
          total_loss: -0.12521398017803828
          vf_explained_var: -0.20424476265907288
          vf_loss: 4.841410126472409e-05
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_trained: 1039000
  iterations_since_restore: 1039


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1039,25649.2,1039000,0,0,0,349.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-10-09_05-32-16
  done: false
  episode_len_mean: 351.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2897
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8789209696981641
          entropy_coeff: 0.009999999999999998
          kl: 0.012251740521757289
          policy_loss: -0.04110596500751045
          total_loss: -0.05098920351722174
          vf_explained_var: -0.4323626756668091
          vf_loss: 7.47562522721839e-05
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_trained: 1040000
  iterations_since_restore: 1040
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1040,25671.7,1040000,0,0,0,351.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-10-09_05-32-38
  done: false
  episode_len_mean: 351.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2900
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8454145815637377
          entropy_coeff: 0.009999999999999998
          kl: 0.01428680867664901
          policy_loss: -0.06215836718264553
          total_loss: -0.07023543916228744
          vf_explained_var: -0.7019833326339722
          vf_loss: 7.895872501345972e-05
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained: 1041000
  iterations_since_restore: 1041
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1041,25693.2,1041000,0,0,0,351.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-10-09_05-33-00
  done: false
  episode_len_mean: 350.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2903
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.812153034740024
          entropy_coeff: 0.009999999999999998
          kl: 0.012906963630060058
          policy_loss: -0.0906903789482183
          total_loss: -0.09944455847144126
          vf_explained_var: -0.23102013766765594
          vf_loss: 6.384178597929551e-05
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trained: 1042000
  iterations_since_restore: 1042
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1042,25714.9,1042000,0,0,0,350.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-10-09_05-33-22
  done: false
  episode_len_mean: 350.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2905
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9654724770122105
          entropy_coeff: 0.009999999999999998
          kl: 0.011705446955429426
          policy_loss: -0.044872789540224604
          total_loss: -0.05603824419279893
          vf_explained_var: -0.7570082545280457
          vf_loss: 5.183059129194589e-05
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_trained: 1043000
  iterations_since_restore: 1043


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1043,25737,1043000,0,0,0,350.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-10-09_05-33-44
  done: false
  episode_len_mean: 351.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2908
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8232814364963108
          entropy_coeff: 0.009999999999999998
          kl: 0.01438555881770557
          policy_loss: -0.12122767468293508
          total_loss: -0.1290202982723713
          vf_explained_var: -0.2292933613061905
          vf_loss: 7.089340599324917e-05
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_trained: 1044000
  iterations_since_restore: 1044
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1044,25759.5,1044000,0,0,0,351.4




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-10-09_05-34-24
  done: false
  episode_len_mean: 351.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2911
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8954731106758118
          entropy_coeff: 0.009999999999999998
          kl: 0.013104018814226098
          policy_loss: -0.056565497484472066
          total_loss: -0.06603668497668372
          vf_explained_var: -0.11785358935594559
          vf_loss: 3.7999371377534566e-05
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_trained: 1045000
  iterations_since_restore: 104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1045,25799.3,1045000,0,0,0,351.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-10-09_05-34-48
  done: false
  episode_len_mean: 350.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2913
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.906260401672787
          entropy_coeff: 0.009999999999999998
          kl: 0.010829931367771984
          policy_loss: -0.12129966728389263
          total_loss: -0.13252027912272346
          vf_explained_var: -0.9353517889976501
          vf_loss: 3.563769302369716e-05
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_trained: 1046000
  iterations_since_restore: 1046
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1046,25823.2,1046000,0,0,0,350.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-10-09_05-35-09
  done: false
  episode_len_mean: 351.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2916
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 2.0186487873395285
          entropy_coeff: 0.009999999999999998
          kl: 0.011114092136963292
          policy_loss: -0.0538364266562793
          total_loss: -0.06598764003978835
          vf_explained_var: -0.5576979517936707
          vf_loss: 2.4090328436917238e-05
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_trained: 1047000
  iterations_since_restore: 1047
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1047,25844.6,1047000,0,0,0,351.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-10-09_05-35-33
  done: false
  episode_len_mean: 351.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2919
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6792409022649128
          entropy_coeff: 0.009999999999999998
          kl: 0.011415059541893414
          policy_loss: -0.08574039474543597
          total_loss: -0.09426780597617229
          vf_explained_var: -0.843005895614624
          vf_loss: 3.6875329998211884e-05
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps_trained: 1048000
  iterations_since_restore: 1048
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1048,25868.1,1048000,0,0,0,351.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-10-09_05-35-55
  done: false
  episode_len_mean: 353.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2922
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.7996881061130099
          entropy_coeff: 0.009999999999999998
          kl: 0.01237257740295574
          policy_loss: -0.10487533937104875
          total_loss: -0.11392463147640228
          vf_explained_var: -0.2143867313861847
          vf_loss: 2.9271400348483845e-05
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_trained: 1049000
  iterations_since_restore: 1049
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1049,25890.5,1049000,0,0,0,353.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-10-09_05-36-18
  done: false
  episode_len_mean: 354.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2924
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.7388201276461284
          entropy_coeff: 0.009999999999999998
          kl: 0.010502071161799856
          policy_loss: -0.06389611028134823
          total_loss: -0.07366787108282248
          vf_explained_var: -0.7361021041870117
          vf_loss: 4.64119224488968e-05
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
  iterations_since_restore: 1050
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1050,25913.2,1050000,0,0,0,354.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-10-09_05-36-40
  done: false
  episode_len_mean: 354.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2927
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8050130248069762
          entropy_coeff: 0.009999999999999998
          kl: 0.01618272117626418
          policy_loss: -0.0693469108806716
          total_loss: -0.07570076363368167
          vf_explained_var: -0.20621177554130554
          vf_loss: 3.1561635735973445e-05
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_trained: 1051000
  iterations_since_restore: 1051
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1051,25934.8,1051000,0,0,0,354.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-10-09_05-37-04
  done: false
  episode_len_mean: 354.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2930
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.907525074481964
          entropy_coeff: 0.009999999999999998
          kl: 0.016632095896944872
          policy_loss: -0.01603956901364856
          total_loss: -0.023105687441097367
          vf_explained_var: -0.41394636034965515
          vf_loss: 2.0501656813899495e-05
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_trained: 1052000
  iterations_since_restore: 1052

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1052,25959.1,1052000,0,0,0,354.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-10-09_05-37-27
  done: false
  episode_len_mean: 354.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2933
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6660734719700283
          entropy_coeff: 0.009999999999999998
          kl: 0.015283390433610735
          policy_loss: -0.060969344795578057
          total_loss: -0.06657394886844688
          vf_explained_var: -0.5408734083175659
          vf_loss: 3.9664218901533684e-05
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_trained: 1053000
  iterations_since_restore: 1053

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1053,25982.5,1053000,0,0,0,354.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-10-09_05-37-49
  done: false
  episode_len_mean: 355.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2935
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9557621240615846
          entropy_coeff: 0.009999999999999998
          kl: 0.010794469003811043
          policy_loss: -0.14308327730331155
          total_loss: -0.15483450657791561
          vf_explained_var: -0.14988848567008972
          vf_loss: 2.55990304847526e-05
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_trained: 1054000
  iterations_since_restore: 1054
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1054,26004.3,1054000,0,0,0,355.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-10-09_05-38-17
  done: false
  episode_len_mean: 352.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 2939
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8820332421196833
          entropy_coeff: 0.009999999999999998
          kl: 0.009023315942924161
          policy_loss: -0.09999712540043725
          total_loss: -0.11220676778919167
          vf_explained_var: -0.4133837819099426
          vf_loss: 0.00010656585688290458
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_trained: 1055000
  iterations_since_restore: 1055


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1055,26032.2,1055000,0,0,0,352.82




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-10-09_05-39-02
  done: false
  episode_len_mean: 352.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 2942
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.803846862581041
          entropy_coeff: 0.009999999999999998
          kl: 0.012028737095205412
          policy_loss: 0.016450186860230233
          total_loss: 0.007111480459570885
          vf_explained_var: -0.2726495862007141
          vf_loss: 2.9294392637287576e-05
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_trained: 1056000
  iterations_since_restore: 1056
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1056,26076.6,1056000,0,0,0,352.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-10-09_05-39-25
  done: false
  episode_len_mean: 352.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2944
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8146441141764322
          entropy_coeff: 0.009999999999999998
          kl: 0.0017495924869076794
          policy_loss: -0.1307014898293548
          total_loss: -0.1475558285911878
          vf_explained_var: -0.4086919128894806
          vf_loss: 3.0972744894420933e-05
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trained: 1057000
  iterations_since_restore: 1057
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1057,26099.6,1057000,0,0,0,352.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-10-09_05-39-48
  done: false
  episode_len_mean: 352.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2947
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.7805911739667257
          entropy_coeff: 0.009999999999999998
          kl: 0.018379589146114216
          policy_loss: -0.11792046229044596
          total_loss: -0.12907188220156562
          vf_explained_var: -0.31874462962150574
          vf_loss: 3.036511746662048e-05
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 1058000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1058,26123.1,1058000,-0.02,0,-2,352.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-10-09_05-40-13
  done: false
  episode_len_mean: 350.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2950
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.9530551897154913
          entropy_coeff: 0.009999999999999998
          kl: 0.021149749319459468
          policy_loss: -0.08705599318361945
          total_loss: -0.09895169276537166
          vf_explained_var: -0.4036494493484497
          vf_loss: 1.2343519721172117e-05
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_trained: 1059000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1059,26147.9,1059000,-0.02,0,-2,350.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-10-09_05-40-39
  done: false
  episode_len_mean: 348.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2953
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7079278839959038
          entropy_coeff: 0.009999999999999998
          kl: 0.01108889691196871
          policy_loss: -0.11357655423796839
          total_loss: -0.12464915580219693
          vf_explained_var: 0.003866010345518589
          vf_loss: 1.1912728064089606e-05
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1060,26174.1,1060000,-0.02,0,-2,348.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-10-09_05-41-02
  done: false
  episode_len_mean: 348.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2956
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8437586413489448
          entropy_coeff: 0.009999999999999998
          kl: 0.01766452359248893
          policy_loss: -0.15201996475872065
          total_loss: -0.16089120469987392
          vf_explained_var: -0.14955797791481018
          vf_loss: 1.673305672132604e-05
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_trained: 1061000
  iterations_since_restore: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1061,26196.7,1061000,-0.02,0,-2,348.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-10-09_05-41-25
  done: false
  episode_len_mean: 346.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2959
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7108836770057678
          entropy_coeff: 0.009999999999999998
          kl: 0.015554114845522528
          policy_loss: -0.09763933759596613
          total_loss: -0.10632471102807257
          vf_explained_var: -0.5339884161949158
          vf_loss: 1.4758696786682574e-05
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained: 1062000
  iterations_since_restore: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1062,26220,1062000,-0.02,0,-2,346.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-10-09_05-41-49
  done: false
  episode_len_mean: 348.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2962
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.981848959128062
          entropy_coeff: 0.009999999999999998
          kl: 0.01627807687114008
          policy_loss: -0.12023670470549
          total_loss: -0.13124226211673684
          vf_explained_var: -0.006333132740110159
          vf_loss: 1.2846404049721766e-05
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_trained: 1063000
  iterations_since_restore: 1063


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1063,26244,1063000,-0.02,0,-2,348.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-10-09_05-42-13
  done: false
  episode_len_mean: 350.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2964
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.791157195303175
          entropy_coeff: 0.009999999999999998
          kl: 0.016233806047570926
          policy_loss: -0.041537551540467475
          total_loss: -0.0506535537333952
          vf_explained_var: -0.0976252481341362
          vf_loss: 1.9418369402046664e-05
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained: 1064000
  iterations_since_restore: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1064,26267.4,1064000,-0.02,0,-2,350.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-10-09_05-42-36
  done: false
  episode_len_mean: 353.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2967
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.939114667309655
          entropy_coeff: 0.009999999999999998
          kl: 0.016676504210473425
          policy_loss: -0.1147990073180861
          total_loss: -0.12516219651119576
          vf_explained_var: 0.015943868085741997
          vf_loss: 1.2480740537689093e-05
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained: 1065000
  iterations_since_restore: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1065,26290.5,1065000,-0.02,0,-2,353.56




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-10-09_05-43-14
  done: false
  episode_len_mean: 356.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2970
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8834111187193128
          entropy_coeff: 0.009999999999999998
          kl: 0.014232124898211277
          policy_loss: -0.09493052288889885
          total_loss: -0.10605750199821261
          vf_explained_var: -0.40403202176094055
          vf_loss: 1.3108558000062152e-05
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained: 1066000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1066,26328.3,1066000,-0.02,0,-2,356.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-10-09_05-43-38
  done: false
  episode_len_mean: 357.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2972
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7499397661950853
          entropy_coeff: 0.009999999999999998
          kl: 0.017160005343670688
          policy_loss: -0.08623532694247034
          total_loss: -0.09442256246176031
          vf_explained_var: -0.43899214267730713
          vf_loss: 3.529549673860149e-05
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained: 1067000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1067,26352.6,1067000,-0.02,0,-2,357.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-10-09_05-43-59
  done: false
  episode_len_mean: 359.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2975
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.923578561676873
          entropy_coeff: 0.009999999999999998
          kl: 0.01793009108896283
          policy_loss: -0.10688583109941747
          total_loss: -0.11642116113669343
          vf_explained_var: -0.5907202959060669
          vf_loss: 7.277573508367317e-06
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_trained: 1068000
  iterations_since_restore: 1068


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1068,26373.5,1068000,-0.02,0,-2,359.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-10-09_05-44-22
  done: false
  episode_len_mean: 360.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2977
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.007749127017127
          entropy_coeff: 0.009999999999999998
          kl: 0.009415150141968535
          policy_loss: -0.05382054365343518
          total_loss: -0.06880303564636657
          vf_explained_var: -0.7765623331069946
          vf_loss: 5.078038189923164e-06
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 1069000
  iterations_since_restore: 1069

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1069,26396.4,1069000,-0.02,0,-2,360.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-10-09_05-44-45
  done: false
  episode_len_mean: 360.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2980
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7745634939935473
          entropy_coeff: 0.009999999999999998
          kl: 0.017463236438294377
          policy_loss: -0.06736289908488592
          total_loss: -0.07564711775630713
          vf_explained_var: -0.0743306577205658
          vf_loss: 2.0619820684765324e-05
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained: 1070000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1070,26419.4,1070000,-0.02,0,-2,360.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-10-09_05-45-08
  done: false
  episode_len_mean: 360.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2983
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7851390295558505
          entropy_coeff: 0.009999999999999998
          kl: 0.016926447527651965
          policy_loss: -0.07455614788664712
          total_loss: -0.08324209742455019
          vf_explained_var: -0.4768815040588379
          vf_loss: 1.4840126373706476e-05
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 1071000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1071,26443.1,1071000,-0.02,0,-2,360.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-10-09_05-45-31
  done: false
  episode_len_mean: 362.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2986
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8576733734872606
          entropy_coeff: 0.009999999999999998
          kl: 0.016102476617180568
          policy_loss: -0.04666146602895525
          total_loss: -0.056522462972336346
          vf_explained_var: -0.13196326792240143
          vf_loss: 1.0581324376188503e-05
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 1072000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1072,26465.3,1072000,-0.02,0,-2,362.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-10-09_05-45-54
  done: false
  episode_len_mean: 362.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2988
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9894361787372166
          entropy_coeff: 0.009999999999999998
          kl: 0.01072828583611438
          policy_loss: -0.10172171153955989
          total_loss: -0.11580981525282065
          vf_explained_var: -0.1675640344619751
          vf_loss: 6.441038223985945e-06
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_trained: 1073000
  iterations_since_restore: 1073

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1073,26488.3,1073000,-0.02,0,-2,362.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-10-09_05-46-17
  done: false
  episode_len_mean: 362.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2991
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8710968017578125
          entropy_coeff: 0.009999999999999998
          kl: 0.01287501723116823
          policy_loss: -0.03682849514815542
          total_loss: -0.04857041947543621
          vf_explained_var: -0.5177339911460876
          vf_loss: 8.684001151474756e-06
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_trained: 1074000
  iterations_since_restore: 1074

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1074,26511.2,1074000,-0.02,0,-2,362.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-10-09_05-46-37
  done: false
  episode_len_mean: 363.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2993
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.792091397444407
          entropy_coeff: 0.009999999999999998
          kl: 0.014608160332622284
          policy_loss: -0.08255603590773211
          total_loss: -0.09257507340775596
          vf_explained_var: -0.3928265869617462
          vf_loss: 4.5619774318260575e-06
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_trained: 1075000
  iterations_since_restore: 107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1075,26531.5,1075000,-0.02,0,-2,363.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-10-09_05-46-58
  done: false
  episode_len_mean: 365.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 2996
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7855848537551031
          entropy_coeff: 0.009999999999999998
          kl: 0.013021949162544939
          policy_loss: -0.057974758992592496
          total_loss: -0.06877146458460225
          vf_explained_var: -1.0
          vf_loss: 1.934795328553365e-05
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained: 1076000
  iterations_since_restore: 1076
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1076,26553,1076000,-0.02,0,-2,365.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-10-09_05-47-19
  done: false
  episode_len_mean: 366.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 2998
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9099069979455736
          entropy_coeff: 0.009999999999999998
          kl: 0.012096776547958763
          policy_loss: -0.020880503652410374
          total_loss: -0.03343060140808423
          vf_explained_var: 0.11050636321306229
          vf_loss: 9.336729888471179e-06
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 1077000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1077,26573.9,1077000,-0.02,0,-2,366.13




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-10-09_05-47-57
  done: false
  episode_len_mean: 366.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3001
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7208492914835611
          entropy_coeff: 0.009999999999999998
          kl: 0.015348047752200887
          policy_loss: -0.09505191275642978
          total_loss: -0.10385916170974573
          vf_explained_var: -0.9233396053314209
          vf_loss: 0.00010394046342349207
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained: 1078000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1078,26611.5,1078000,-0.02,0,-2,366.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-10-09_05-48-21
  done: false
  episode_len_mean: 365.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3004
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6748573607868618
          entropy_coeff: 0.009999999999999998
          kl: 0.020335708668474693
          policy_loss: -0.10625275977783732
          total_loss: -0.11200204243262608
          vf_explained_var: 0.12038540095090866
          vf_loss: 5.60825408734268e-06
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained: 1079000
  iterations_since_restore: 1079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1079,26635.2,1079000,-0.02,0,-2,365.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-10-09_05-48-44
  done: false
  episode_len_mean: 365.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 3006
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.849893609682719
          entropy_coeff: 0.009999999999999998
          kl: 0.012686326583444402
          policy_loss: -0.07495295430223146
          total_loss: -0.08315978277888562
          vf_explained_var: -0.4017789959907532
          vf_loss: 4.5818970736238245e-06
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_trained: 1080000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1080,26658.4,1080000,-0.02,0,-2,365.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-10-09_05-49-08
  done: false
  episode_len_mean: 365.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3009
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.4407122400071886
          entropy_coeff: 0.009999999999999998
          kl: 0.014645099144679246
          policy_loss: -0.1076528420050939
          total_loss: -0.10983065161854029
          vf_explained_var: -0.7782443761825562
          vf_loss: 0.00035338663968407976
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_trained: 1081000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1081,26682.3,1081000,-0.02,0,-2,365.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-10-09_05-49-30
  done: false
  episode_len_mean: 365.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3012
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8545448448922899
          entropy_coeff: 0.009999999999999998
          kl: 0.012517724309194773
          policy_loss: 0.0002318141981959343
          total_loss: -0.008152919987009632
          vf_explained_var: -0.41698363423347473
          vf_loss: 9.908506457476404e-06
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 1082000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1082,26704.3,1082000,-0.02,0,-2,365.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-10-09_05-49-54
  done: false
  episode_len_mean: 365.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3015
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9076094932026333
          entropy_coeff: 0.009999999999999998
          kl: 0.012731429073484988
          policy_loss: -0.0640872657402522
          total_loss: -0.07283493403552307
          vf_explained_var: -0.46242666244506836
          vf_loss: 4.3268081678130935e-06
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained: 1083000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1083,26728.3,1083000,-0.02,0,-2,365.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-10-09_05-50-17
  done: false
  episode_len_mean: 365.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 3017
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.022392331229316
          entropy_coeff: 0.009999999999999998
          kl: 0.01133126342790907
          policy_loss: -0.029750669519934388
          total_loss: -0.04078362312995725
          vf_explained_var: -0.47432810068130493
          vf_loss: 2.2831929729565873e-06
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_trained: 1084000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1084,26751.8,1084000,-0.02,0,-2,365.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-10-09_05-50-46
  done: false
  episode_len_mean: 363.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 3021
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6976041184531319
          entropy_coeff: 0.009999999999999998
          kl: 0.012859553076693923
          policy_loss: -0.10307311125927501
          total_loss: -0.1096129501859347
          vf_explained_var: -0.05214184522628784
          vf_loss: 8.203597624540432e-06
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_trained: 1085000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1085,26779.9,1085000,-0.02,0,-2,363.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-10-09_05-51-10
  done: false
  episode_len_mean: 361.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3024
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9322923925187854
          entropy_coeff: 0.009999999999999998
          kl: 0.011508105172271656
          policy_loss: -0.06760252060161696
          total_loss: -0.07759044996152321
          vf_explained_var: -0.5806922912597656
          vf_loss: 2.904284134628041e-06
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained: 1086000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1086,26804.4,1086000,-0.02,0,-2,361.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-10-09_05-51-34
  done: false
  episode_len_mean: 361.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3027
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.818427324295044
          entropy_coeff: 0.009999999999999998
          kl: 0.011068717363039582
          policy_loss: -0.045706706287132366
          total_loss: -0.05491122139824761
          vf_explained_var: -0.5634397864341736
          vf_loss: 3.975523491640666e-06
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_trained: 1087000
  iterations_since_restore: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1087,26828.3,1087000,-0.02,0,-2,361.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-10-09_05-52-17
  done: false
  episode_len_mean: 359.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3030
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9675268875228034
          entropy_coeff: 0.009999999999999998
          kl: 0.015265877907066875
          policy_loss: -0.0908486653947168
          total_loss: -0.09814152514768971
          vf_explained_var: -0.4718233346939087
          vf_loss: 3.086830622578418e-06
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_trained: 1088000
  iterations_since_restore: 1088


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1088,26871,1088000,-0.02,0,-2,359.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-10-09_05-52-43
  done: false
  episode_len_mean: 358.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3033
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.767745057741801
          entropy_coeff: 0.009999999999999998
          kl: 0.012586911446921433
          policy_loss: 0.030791438391639128
          total_loss: 0.02332357424828741
          vf_explained_var: -0.5007526874542236
          vf_loss: 2.6750617191990186e-06
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trained: 1089000
  iterations_since_restore: 1089

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1089,26897.7,1089000,-0.02,0,-2,358.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-10-09_05-53-06
  done: false
  episode_len_mean: 358.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 3035
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9105154739485846
          entropy_coeff: 0.009999999999999998
          kl: 0.009015373519767058
          policy_loss: -0.07084405765765243
          total_loss: -0.0826358188357618
          vf_explained_var: -0.7772660255432129
          vf_loss: 2.6958420322545055e-06
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_trained: 1090000
  iterations_since_restore: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1090,26920.4,1090000,-0.02,0,-2,358.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-10-09_05-53-27
  done: false
  episode_len_mean: 361.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3038
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.721450071864658
          entropy_coeff: 0.009999999999999998
          kl: 0.009991462534527666
          policy_loss: -0.09692989678329064
          total_loss: -0.10601957546960976
          vf_explained_var: -0.34329307079315186
          vf_loss: 2.259952634378553e-05
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained: 1091000
  iterations_since_restore: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1091,26941.2,1091000,-0.02,0,-2,361.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-10-09_05-53-48
  done: false
  episode_len_mean: 363.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 3040
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5410634994506835
          entropy_coeff: 0.009999999999999998
          kl: 0.013758671729307646
          policy_loss: -0.03784739569657379
          total_loss: -0.04208672081844674
          vf_explained_var: 0.11296510696411133
          vf_loss: 1.4200476910749178e-05
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_trained: 1092000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1092,26962.3,1092000,-0.02,0,-2,363.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-10-09_05-54-10
  done: false
  episode_len_mean: 365.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 3043
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7611847586101956
          entropy_coeff: 0.009999999999999998
          kl: 0.015583527126896177
          policy_loss: -0.12075719609856606
          total_loss: -0.12572618280020026
          vf_explained_var: -0.007108732126653194
          vf_loss: 5.953278317368434e-06
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_trained: 1093000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1093,26984.4,1093000,-0.02,0,-2,365.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-10-09_05-54-31
  done: false
  episode_len_mean: 366.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3045
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.702947861618466
          entropy_coeff: 0.009999999999999998
          kl: 0.013548968584497464
          policy_loss: -0.023745081656508977
          total_loss: -0.029783588647842408
          vf_explained_var: 0.4417211711406708
          vf_loss: 3.913415570069547e-06
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_trained: 1094000
  iterations_since_restore: 1094
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1094,27005,1094000,0,0,0,366.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-10-09_05-54-55
  done: false
  episode_len_mean: 366.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3048
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.41661852200826
          entropy_coeff: 0.009999999999999998
          kl: 0.01029924665577183
          policy_loss: -0.007305690190858311
          total_loss: -0.013115059501594967
          vf_explained_var: -0.4544920325279236
          vf_loss: 5.00407230295726e-06
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_trained: 1095000
  iterations_since_restore: 1095
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1095,27028.9,1095000,0,0,0,366.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-10-09_05-55-17
  done: false
  episode_len_mean: 366.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3051
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.750434903303782
          entropy_coeff: 0.009999999999999998
          kl: 0.013602464672273959
          policy_loss: -0.08596106320619583
          total_loss: -0.09243021723296907
          vf_explained_var: -0.632463812828064
          vf_loss: 4.756601116342709e-06
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_trained: 1096000
  iterations_since_restore: 1096
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1096,27051.6,1096000,0,0,0,366.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-10-09_05-55-42
  done: false
  episode_len_mean: 368.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3054
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5476032879617478
          entropy_coeff: 0.009999999999999998
          kl: 0.015838437851837504
          policy_loss: 0.0017139616525835462
          total_loss: -0.0009126899143060049
          vf_explained_var: 0.01420888677239418
          vf_loss: 5.75861411865238e-06
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_trained: 1097000
  iterations_since_restore: 1097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1097,27075.8,1097000,0,0,0,368.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-10-09_05-56-06
  done: false
  episode_len_mean: 367.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3057
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6747877968682183
          entropy_coeff: 0.009999999999999998
          kl: 0.013855297789364505
          policy_loss: -0.10686377440061834
          total_loss: -0.11236931354635292
          vf_explained_var: 0.041954394429922104
          vf_loss: 6.875912477729192e-06
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trained: 1098000
  iterations_since_restore: 1098


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1098,27100.5,1098000,0,0,0,367.57




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-10-09_05-56-48
  done: false
  episode_len_mean: 365.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3060
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.576971697807312
          entropy_coeff: 0.009999999999999998
          kl: 0.015251803304618939
          policy_loss: -0.08793488935463958
          total_loss: -0.09133018317321936
          vf_explained_var: -0.03299110755324364
          vf_loss: 6.513288730679455e-06
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trained: 1099000
  iterations_since_restore: 1099
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1099,27141.8,1099000,0,0,0,365.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-10-09_05-57-12
  done: false
  episode_len_mean: 366.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3062
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6715061651335823
          entropy_coeff: 0.009999999999999998
          kl: 0.011486332920758683
          policy_loss: -0.0630447458061907
          total_loss: -0.0704384124941296
          vf_explained_var: -0.09332633763551712
          vf_loss: 6.959046140764258e-06
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
  iterations_since_restore: 1100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1100,27165.9,1100000,0,0,0,366.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-10-09_05-57-33
  done: false
  episode_len_mean: 367.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3065
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5941780302259656
          entropy_coeff: 0.009999999999999998
          kl: 0.013459335918720821
          policy_loss: -0.09036280219960544
          total_loss: -0.09521516099986103
          vf_explained_var: -0.48715874552726746
          vf_loss: 0.0001750463510613069
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained: 1101000
  iterations_since_restore: 1101


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1101,27187.2,1101000,0,0,0,367.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-10-09_05-57-56
  done: false
  episode_len_mean: 367.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3068
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5060513483153448
          entropy_coeff: 0.009999999999999998
          kl: 0.01626863897333553
          policy_loss: -0.052780282745758696
          total_loss: -0.054642581442991896
          vf_explained_var: 0.12918558716773987
          vf_loss: 5.739184254909762e-06
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_trained: 1102000
  iterations_since_restore: 1102


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1102,27210.2,1102000,0,0,0,367.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-10-09_05-58-21
  done: false
  episode_len_mean: 367.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3070
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8053759495417276
          entropy_coeff: 0.009999999999999998
          kl: 0.013206612531975912
          policy_loss: -0.10060350925972064
          total_loss: -0.10794347723325094
          vf_explained_var: -0.4465779662132263
          vf_loss: 4.353864473058719e-06
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_trained: 1103000
  iterations_since_restore: 1103
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1103,27234.8,1103000,0,0,0,367.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-10-09_05-58-44
  done: false
  episode_len_mean: 366.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3073
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9606775297058954
          entropy_coeff: 0.009999999999999998
          kl: 0.012670657141714706
          policy_loss: -0.08175941718638771
          total_loss: -0.09108743880771929
          vf_explained_var: -0.572583794593811
          vf_loss: 3.93138269474649e-06
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_trained: 1104000
  iterations_since_restore: 1104
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1104,27258.4,1104000,0,0,0,366.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-10-09_05-59-09
  done: false
  episode_len_mean: 365.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3076
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5953189863098993
          entropy_coeff: 0.009999999999999998
          kl: 0.009565558713347558
          policy_loss: -0.1149262055547701
          total_loss: -0.12311879578563902
          vf_explained_var: -0.6101011633872986
          vf_loss: 3.7493125306456125e-06
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_trained: 1105000
  iterations_since_restore: 1105
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1105,27282.6,1105000,0,0,0,365.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-10-09_05-59-30
  done: false
  episode_len_mean: 365.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3078
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6592861930529277
          entropy_coeff: 0.009999999999999998
          kl: 0.0044642889464027585
          policy_loss: -0.32572400205665164
          total_loss: -0.3386938489145703
          vf_explained_var: 0.03426332026720047
          vf_loss: 2.8563268150113194e-06
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_trained: 1106000
  iterations_since_restore: 1106


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1106,27304.1,1106000,0,0,0,365.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-10-09_05-59-53
  done: false
  episode_len_mean: 365.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3081
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.544049400753445
          entropy_coeff: 0.009999999999999998
          kl: 0.012519484889342403
          policy_loss: 0.014721376531653934
          total_loss: 0.04330652753512065
          vf_explained_var: -0.39639389514923096
          vf_loss: 0.03894953087929025
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_trained: 1107000
  iterations_since_restore: 1107


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1107,27326.8,1107000,-0.12,0,-7,365.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-10-09_06-00-13
  done: false
  episode_len_mean: 367.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 3083
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.870048667324914
          entropy_coeff: 0.009999999999999998
          kl: 0.02129781811766245
          policy_loss: -0.12116560394772224
          total_loss: -0.12629844820540811
          vf_explained_var: -0.12756046652793884
          vf_loss: 0.004932283708411787
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_trained: 1108000
  iterations_since_restore: 1108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1108,27346.8,1108000,-0.12,0,-7,367.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-10-09_06-00-37
  done: false
  episode_len_mean: 367.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3086
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5060074951913622
          entropy_coeff: 0.009999999999999998
          kl: 0.013289680906735728
          policy_loss: -0.07421222151153617
          total_loss: -0.07847146565715472
          vf_explained_var: -0.17853349447250366
          vf_loss: 0.002718236583880045
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trained: 1109000
  iterations_since_restore: 110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1109,27371,1109000,-0.12,0,-7,367.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-10-09_06-01-02
  done: false
  episode_len_mean: 366.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3089
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.45774134794871
          entropy_coeff: 0.009999999999999998
          kl: 0.011171446411670397
          policy_loss: -0.11394904508358902
          total_loss: -0.11887002223067814
          vf_explained_var: 0.035849787294864655
          vf_loss: 0.0028621202315359064
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
  iterations_since_restore: 1110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1110,27396,1110000,-0.12,0,-7,366.28




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-10-09_06-01-42
  done: false
  episode_len_mean: 366.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3092
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.868029867278205
          entropy_coeff: 0.009999999999999998
          kl: 0.01592402539518246
          policy_loss: -0.1383906969593631
          total_loss: -0.14575571277075344
          vf_explained_var: -0.5754544734954834
          vf_loss: 0.0016305143233492142
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained: 1111000
  iterations_since_restore: 1111
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1111,27436.1,1111000,-0.12,0,-7,366.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-10-09_06-02-07
  done: false
  episode_len_mean: 364.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3095
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.54325633181466
          entropy_coeff: 0.009999999999999998
          kl: 0.01543744992095226
          policy_loss: -0.11840429397092925
          total_loss: -0.1232480537560251
          vf_explained_var: 0.10494234412908554
          vf_loss: 0.001199963816260505
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_trained: 1112000
  iterations_since_restore: 1112
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1112,27461.1,1112000,-0.12,0,-7,364.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-10-09_06-02-33
  done: false
  episode_len_mean: 361.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3098
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5245724572075738
          entropy_coeff: 0.009999999999999998
          kl: 0.013733266250986
          policy_loss: -0.06749651073995563
          total_loss: -0.07295463519791762
          vf_explained_var: -0.47680553793907166
          vf_loss: 0.0014352205209434033
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trained: 1113000
  iterations_since_restore: 1113


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1113,27486.9,1113000,-0.12,0,-7,361.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-10-09_06-02-56
  done: false
  episode_len_mean: 360.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3101
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8489792267481486
          entropy_coeff: 0.009999999999999998
          kl: 0.014604066405166078
          policy_loss: -0.04272962520933814
          total_loss: -0.0514022094094091
          vf_explained_var: -0.43164190649986267
          vf_loss: 0.0009352207804719607
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_trained: 1114000
  iterations_since_restore: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1114,27509.4,1114000,-0.12,0,-7,360.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-10-09_06-03-21
  done: false
  episode_len_mean: 359.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3104
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7541408936182659
          entropy_coeff: 0.009999999999999998
          kl: 0.017379434027209387
          policy_loss: -0.12161760895202557
          total_loss: -0.12739731228599946
          vf_explained_var: 0.05365324392914772
          vf_loss: 0.0011917769355932251
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_trained: 1115000
  iterations_since_restore: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1115,27535.2,1115000,-0.12,0,-7,359.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-10-09_06-03-43
  done: false
  episode_len_mean: 360.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 3106
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.725280233224233
          entropy_coeff: 0.009999999999999998
          kl: 0.012108599888751698
          policy_loss: -0.10808278965867228
          total_loss: -0.11739411463754044
          vf_explained_var: -0.5086798071861267
          vf_loss: 0.0005771965774733366
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_trained: 1116000
  iterations_since_restore: 1116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1116,27556.7,1116000,-0.12,0,-7,360.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-10-09_06-04-06
  done: false
  episode_len_mean: 360.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3109
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.760961902141571
          entropy_coeff: 0.009999999999999998
          kl: 0.016660154676544654
          policy_loss: -0.05996850240561697
          total_loss: -0.06693234766523043
          vf_explained_var: -0.5869529843330383
          vf_loss: 0.0005133005218037094
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_trained: 1117000
  iterations_since_restore: 1117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1117,27579.6,1117000,-0.12,0,-7,360.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-10-09_06-04-29
  done: false
  episode_len_mean: 360.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3112
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8167169716623095
          entropy_coeff: 0.009999999999999998
          kl: 0.01546348718325095
          policy_loss: -0.08369479090389279
          total_loss: -0.09194593694474962
          vf_explained_var: -0.6746959090232849
          vf_loss: 0.0005113494194423159
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_trained: 1118000
  iterations_since_restore: 1118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1118,27602.4,1118000,-0.12,0,-7,360.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-10-09_06-04-53
  done: false
  episode_len_mean: 360.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3115
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.710813127623664
          entropy_coeff: 0.009999999999999998
          kl: 0.013395056056166312
          policy_loss: -0.022020397138678365
          total_loss: -0.030635643377900125
          vf_explained_var: -0.7944431900978088
          vf_loss: 0.00034619786327109775
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_trained: 1119000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1119,27626.7,1119000,-0.12,0,-7,360.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-10-09_06-05-15
  done: false
  episode_len_mean: 360.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 3117
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7895379066467285
          entropy_coeff: 0.009999999999999998
          kl: 0.015639013161040743
          policy_loss: -0.07002843022346497
          total_loss: -0.07796760060720974
          vf_explained_var: -0.9020333886146545
          vf_loss: 0.0004447834383528162
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_trained: 1120000
  iterations_since_restore: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1120,27649,1120000,-0.12,0,-7,360.59




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-10-09_06-05-56
  done: false
  episode_len_mean: 362.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3120
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5824905488226149
          entropy_coeff: 0.009999999999999998
          kl: 0.014159705138648677
          policy_loss: -0.06933513391349051
          total_loss: -0.07631676304671499
          vf_explained_var: -0.4839622974395752
          vf_loss: 0.00023154206646722742
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_trained: 1121000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1121,27689.3,1121000,-0.12,0,-7,362.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-10-09_06-06-21
  done: false
  episode_len_mean: 362.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3123
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7412344733874003
          entropy_coeff: 0.009999999999999998
          kl: 0.016500482649349144
          policy_loss: -0.06057953851090537
          total_loss: -0.06758703025471834
          vf_explained_var: -0.6748751997947693
          vf_loss: 0.0003694894920853484
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_trained: 1122000
  iterations_since_restore: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1122,27714.3,1122000,-0.12,0,-7,362.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-10-09_06-06-44
  done: false
  episode_len_mean: 362.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3126
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6777731166945564
          entropy_coeff: 0.009999999999999998
          kl: 0.013471122701376853
          policy_loss: -0.06403088068796528
          total_loss: -0.07229035132461124
          vf_explained_var: -0.8822152614593506
          vf_loss: 0.0003253121766546327
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trained: 1123000
  iterations_since_restore: 112

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1123,27737.3,1123000,-0.12,0,-7,362.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-10-09_06-07-10
  done: false
  episode_len_mean: 361.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3129
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.497163520918952
          entropy_coeff: 0.009999999999999998
          kl: 0.013812130961411226
          policy_loss: -0.06368520764840974
          total_loss: -0.07005248707201746
          vf_explained_var: -0.6918473243713379
          vf_loss: 0.0002040100731796378
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_trained: 1124000
  iterations_since_restore: 1124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1124,27763.8,1124000,-0.12,0,-7,361.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-10-09_06-07-37
  done: false
  episode_len_mean: 363.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3132
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7555876321262784
          entropy_coeff: 0.009999999999999998
          kl: 0.01440422002840247
          policy_loss: -0.09798787472148736
          total_loss: -0.10657717755271329
          vf_explained_var: -0.8063113689422607
          vf_loss: 0.00020612594938332525
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_trained: 1125000
  iterations_since_restore: 1125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1125,27790.8,1125000,-0.12,0,-7,363


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-10-09_06-08-04
  done: false
  episode_len_mean: 359.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 3136
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3998695466253492
          entropy_coeff: 0.009999999999999998
          kl: 0.024403503244396555
          policy_loss: -0.07389369068874253
          total_loss: -0.07283439189195633
          vf_explained_var: -0.20786304771900177
          vf_loss: 0.00021612735145026818
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_trained: 1126000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1126,27817.6,1126000,-0.12,0,-7,359.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-10-09_06-08-27
  done: false
  episode_len_mean: 358.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 3138
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7551083856158787
          entropy_coeff: 0.009999999999999998
          kl: 0.010110652478700289
          policy_loss: -0.09091910653644138
          total_loss: -0.09903354578548007
          vf_explained_var: -1.0
          vf_loss: 0.0002129068933653697
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_trained: 1127000
  iterations_since_restore: 1127
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1127,27840.6,1127000,-0.12,0,-7,358.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-10-09_06-08-50
  done: false
  episode_len_mean: 357.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3141
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8569034748607212
          entropy_coeff: 0.009999999999999998
          kl: 0.014119828501493735
          policy_loss: -0.07350036174886757
          total_loss: -0.07901974933014975
          vf_explained_var: -0.6386109590530396
          vf_loss: 0.00016842819483523877
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_trained: 1128000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1128,27863.4,1128000,-0.12,0,-7,357.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-10-09_06-09-15
  done: false
  episode_len_mean: 355.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3144
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.5878819823265076
          entropy_coeff: 0.009999999999999998
          kl: 0.010749982310620451
          policy_loss: -0.07160479575395584
          total_loss: -0.07755970909363694
          vf_explained_var: -0.49498817324638367
          vf_loss: 0.00011692302796291187
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trained: 1129000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1129,27888.3,1129000,-0.12,0,-7,355.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-10-09_06-09-41
  done: false
  episode_len_mean: 354.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3147
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.3773996313412984
          entropy_coeff: 0.009999999999999998
          kl: 0.010653925301079031
          policy_loss: -0.06235110726621416
          total_loss: -0.06629687671860059
          vf_explained_var: -0.1312686949968338
          vf_loss: 0.00010887407091407416
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_trained: 1130000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1130,27914.5,1130000,-0.12,0,-7,354.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-10-09_06-10-22
  done: false
  episode_len_mean: 353.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3150
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.4701485726568433
          entropy_coeff: 0.009999999999999998
          kl: 0.014605746023116081
          policy_loss: -0.09112885379128986
          total_loss: -0.09227893402179083
          vf_explained_var: -0.6662238240242004
          vf_loss: 0.0002268924456681513
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_trained: 1131000
  iterations_since_restore: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1131,27955.1,1131000,-0.12,0,-7,353.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-10-09_06-10-46
  done: false
  episode_len_mean: 352.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3153
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6758244064119128
          entropy_coeff: 0.009999999999999998
          kl: 0.010377768237127011
          policy_loss: -0.0641423601243231
          total_loss: -0.07128943143826393
          vf_explained_var: -0.34726330637931824
          vf_loss: 0.00014375055147360804
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_steps_trained: 1132000
  iterations_since_restore: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1132,27979.6,1132000,-0.12,0,-7,352.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-10-09_06-11-10
  done: false
  episode_len_mean: 352.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 3156
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.5283839424451193
          entropy_coeff: 0.009999999999999998
          kl: 0.010228405608207147
          policy_loss: -0.07762116154448853
          total_loss: -0.08348935879766942
          vf_explained_var: -0.8851690292358398
          vf_loss: 8.448299219404968e-05
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_trained: 1133000
  iterations_since_restore: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1133,28003.5,1133000,-0.12,0,-7,352.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-10-09_06-11-33
  done: false
  episode_len_mean: 353.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 3158
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6843449857499864
          entropy_coeff: 0.009999999999999998
          kl: 0.009789044809722923
          policy_loss: -0.08117329043646654
          total_loss: -0.08901008872522248
          vf_explained_var: 0.4117630422115326
          vf_loss: 7.631203144329549e-05
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trained: 1134000
  iterations_since_restore: 1134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1134,28026.2,1134000,-0.12,0,-7,353.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-10-09_06-11-59
  done: false
  episode_len_mean: 352.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 3162
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.670987147755093
          entropy_coeff: 0.009999999999999998
          kl: 0.011608120907698015
          policy_loss: -0.04264086021317376
          total_loss: -0.048680796143081455
          vf_explained_var: 0.13120169937610626
          vf_loss: 8.008991135284304e-05
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_trained: 1135000
  iterations_since_restore: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1135,28052.7,1135000,-0.12,0,-7,352.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-10-09_06-12-20
  done: false
  episode_len_mean: 354.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3164
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6200518210728962
          entropy_coeff: 0.009999999999999998
          kl: 0.011839264625503314
          policy_loss: -0.0028675438629256356
          total_loss: 0.24161749730507534
          vf_explained_var: -0.48538780212402344
          vf_loss: 0.24988484454095466
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trained: 1136000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1136,28073.1,1136000,-0.22,0,-10,354.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-10-09_06-12-40
  done: false
  episode_len_mean: 353.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6697530719969007
          entropy_coeff: 0.009999999999999998
          kl: 0.012103256158185922
          policy_loss: -0.039376366635163626
          total_loss: -0.035328209979666604
          vf_explained_var: -0.2757580876350403
          vf_loss: 0.009704142524343397
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trained: 1137000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1137,28093.5,1137000,-0.22,0,-10,353.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-10-09_06-13-05
  done: false
  episode_len_mean: 353.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3169
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6311955677138434
          entropy_coeff: 0.009999999999999998
          kl: 0.011168472317254046
          policy_loss: -0.10676484819915559
          total_loss: -0.1065383215331369
          vf_explained_var: -0.8044672012329102
          vf_loss: 0.006349722114909026
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained: 1138000
  iterations_since_restore: 1138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1138,28118,1138000,-0.22,0,-10,353.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-10-09_06-13-28
  done: false
  episode_len_mean: 353.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3172
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8497042218844095
          entropy_coeff: 0.009999999999999998
          kl: 0.013087742107848636
          policy_loss: -0.04137872194664346
          total_loss: -0.042591969586080976
          vf_explained_var: -0.3310545086860657
          vf_loss: 0.005344122552519871
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_trained: 1139000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1139,28141.5,1139000,-0.22,0,-10,353.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-10-09_06-13-53
  done: false
  episode_len_mean: 353.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3175
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.497880498568217
          entropy_coeff: 0.009999999999999998
          kl: 0.013534345528253293
          policy_loss: -0.08242593049589131
          total_loss: -0.08065125147501627
          vf_explained_var: 0.31960952281951904
          vf_loss: 0.00440638503835847
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_trained: 1140000
  iterations_since_restore: 1140


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1140,28166,1140000,-0.22,0,-10,353.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-10-09_06-14-17
  done: false
  episode_len_mean: 352.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3178
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7637485411432055
          entropy_coeff: 0.009999999999999998
          kl: 0.01025701850855714
          policy_loss: -0.09461782934765021
          total_loss: -0.08766652614706093
          vf_explained_var: -0.03864249214529991
          vf_loss: 0.015231524001703495
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 1141000
  iterations_since_restore: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1141,28190.2,1141000,-0.22,0,-10,352.01




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-10-09_06-14-55
  done: false
  episode_len_mean: 352.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3181
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.5151206546359592
          entropy_coeff: 0.009999999999999998
          kl: 0.015450065718955368
          policy_loss: -0.12971041924837562
          total_loss: -0.12669291597687535
          vf_explained_var: -0.4175901710987091
          vf_loss: 0.004073937508251725
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained: 1142000
  iterations_since_restore: 1142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1142,28228,1142000,-0.1,0,-10,352.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-10-09_06-15-18
  done: false
  episode_len_mean: 350.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3183
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8344150529967413
          entropy_coeff: 0.009999999999999998
          kl: 0.011897170755498267
          policy_loss: -0.11163727353430457
          total_loss: -0.11635783225711849
          vf_explained_var: -0.9486403465270996
          vf_loss: 0.002770050934567634
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trained: 1143000
  iterations_since_restore: 1143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1143,28251.5,1143000,-0.1,0,-10,350.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-10-09_06-15-47
  done: false
  episode_len_mean: 347.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 4
  episodes_total: 3187
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.0110397305753496
          entropy_coeff: 0.009999999999999998
          kl: 0.006239011572460029
          policy_loss: -0.008459593024518755
          total_loss: -0.01206772161854638
          vf_explained_var: 0.20769450068473816
          vf_loss: 0.0008105490861150126
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained: 1144000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1144,28280.6,1144000,-0.1,0,-10,347.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-10-09_06-16-11
  done: false
  episode_len_mean: 348.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3190
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6372245377964443
          entropy_coeff: 0.009999999999999998
          kl: 0.011730979965652151
          policy_loss: -0.1087762915632791
          total_loss: -0.11237058552602927
          vf_explained_var: -0.9892969727516174
          vf_loss: 0.002076023813181867
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 1145000
  iterations_since_restore: 1145


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1145,28304.6,1145000,-0.1,0,-10,348.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-10-09_06-16-36
  done: false
  episode_len_mean: 347.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3193
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.801073635949029
          entropy_coeff: 0.009999999999999998
          kl: 0.013543862161756317
          policy_loss: -0.05152778062555525
          total_loss: -0.055413989681336616
          vf_explained_var: -0.7344861626625061
          vf_loss: 0.0017687469048218594
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained: 1146000
  iterations_since_restore: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1146,28328.8,1146000,-0.1,0,-10,347.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-10-09_06-16-57
  done: false
  episode_len_mean: 349.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3195
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.801634669303894
          entropy_coeff: 0.009999999999999998
          kl: 0.01058009021067338
          policy_loss: -0.1852601419720385
          total_loss: -0.19157649990585116
          vf_explained_var: -0.7031810283660889
          vf_loss: 0.0020479965385877424
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trained: 1147000
  iterations_since_restore: 1147
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1147,28349.6,1147000,-0.1,0,-10,349.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-10-09_06-17-21
  done: false
  episode_len_mean: 349.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3198
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.3393930792808533
          entropy_coeff: 0.009999999999999998
          kl: 0.013476869768841127
          policy_loss: -0.13004487496283318
          total_loss: -0.128207665681839
          vf_explained_var: 0.3400212526321411
          vf_loss: 0.002936472928073878
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trained: 1148000
  iterations_since_restore: 1148
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1148,28374.4,1148000,-0.1,0,-10,349.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-10-09_06-17-45
  done: false
  episode_len_mean: 348.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3201
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8454397506184048
          entropy_coeff: 0.009999999999999998
          kl: 0.011989802311775499
          policy_loss: -0.02007604328294595
          total_loss: -0.02633183958629767
          vf_explained_var: -0.6272268295288086
          vf_loss: 0.001260559036422314
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_trained: 1149000
  iterations_since_restore: 1149

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1149,28397.9,1149000,-0.1,0,-10,348.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-10-09_06-18-08
  done: false
  episode_len_mean: 349.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3204
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6446887453397114
          entropy_coeff: 0.009999999999999998
          kl: 0.012905187444218551
          policy_loss: -0.0304878747711579
          total_loss: -0.03409817905889617
          vf_explained_var: -0.5168319940567017
          vf_loss: 0.0010634529372004586
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
  iterations_since_restore: 1150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1150,28421.1,1150000,-0.1,0,-10,349.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-10-09_06-18-30
  done: false
  episode_len_mean: 349.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3206
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.744281029701233
          entropy_coeff: 0.009999999999999998
          kl: 0.013244028255945157
          policy_loss: -0.05991069324728515
          total_loss: -0.06403975724760029
          vf_explained_var: -0.8063071966171265
          vf_loss: 0.0012315009416650152
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_trained: 1151000
  iterations_since_restore: 1151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1151,28443,1151000,-0.1,0,-10,349.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-10-09_06-18-50
  done: false
  episode_len_mean: 350.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3209
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8573298626475865
          entropy_coeff: 0.009999999999999998
          kl: 0.012534800941655938
          policy_loss: -0.0618908548520671
          total_loss: -0.06773070217006737
          vf_explained_var: -0.6980290412902832
          vf_loss: 0.0012982143296135797
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_trained: 1152000
  iterations_since_restore: 1152

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1152,28463.4,1152000,-0.1,0,-10,350.97




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-10-09_06-19-31
  done: false
  episode_len_mean: 350.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3211
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.774169339074029
          entropy_coeff: 0.009999999999999998
          kl: 0.01031906743525176
          policy_loss: -0.05710420923100577
          total_loss: -0.06465569372392363
          vf_explained_var: -0.7947360277175903
          vf_loss: 0.0007763399018181695
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_trained: 1153000
  iterations_since_restore: 1153


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1153,28503.6,1153000,-0.1,0,-10,350.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-10-09_06-19-54
  done: false
  episode_len_mean: 352.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3214
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7417078534762065
          entropy_coeff: 0.009999999999999998
          kl: 0.011996314232232979
          policy_loss: -0.07049500457942486
          total_loss: -0.07550110529280371
          vf_explained_var: -0.78575199842453
          vf_loss: 0.0014669901814260004
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained: 1154000
  iterations_since_restore: 1154


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1154,28527.2,1154000,-0.1,0,-10,352.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-10-09_06-20-18
  done: false
  episode_len_mean: 350.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3217
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7766378866301642
          entropy_coeff: 0.009999999999999998
          kl: 0.010679640114823504
          policy_loss: -0.02335226595815685
          total_loss: -0.030554575162629286
          vf_explained_var: -0.12852191925048828
          vf_loss: 0.0008212573198963784
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained: 1155000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1155,28550.8,1155000,-0.1,0,-10,350.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-10-09_06-20-39
  done: false
  episode_len_mean: 351.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3219
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8732191734843784
          entropy_coeff: 0.009999999999999998
          kl: 0.012357974476672594
          policy_loss: -0.0819186122984522
          total_loss: -0.0884906206590434
          vf_explained_var: -0.7742675542831421
          vf_loss: 0.0008862661766923136
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trained: 1156000
  iterations_since_restore: 1156


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1156,28571.8,1156000,-0.1,0,-10,351.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-10-09_06-21-03
  done: false
  episode_len_mean: 352.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3222
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8597302529546949
          entropy_coeff: 0.009999999999999998
          kl: 0.011473244730358316
          policy_loss: -0.06690614538060294
          total_loss: -0.07436906980971496
          vf_explained_var: -0.7887222170829773
          vf_loss: 0.0006675796253451456
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_trained: 1157000
  iterations_since_restore: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1157,28595.5,1157000,-0.1,0,-10,352.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-10-09_06-21-24
  done: false
  episode_len_mean: 353.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3225
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8778036488427057
          entropy_coeff: 0.009999999999999998
          kl: 0.013757343537471734
          policy_loss: -0.05388219166133139
          total_loss: -0.05958153744124704
          vf_explained_var: -0.9640302658081055
          vf_loss: 0.0005281553150982492
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained: 1158000
  iterations_since_restore: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1158,28616.7,1158000,-0.1,0,-10,353.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-10-09_06-21-45
  done: false
  episode_len_mean: 353.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3227
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8220338013437058
          entropy_coeff: 0.009999999999999998
          kl: 0.009228587538188717
          policy_loss: -0.05244337377241916
          total_loss: -0.06194832809269428
          vf_explained_var: -0.6982533931732178
          vf_loss: 0.0002963391200561697
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained: 1159000
  iterations_since_restore: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1159,28637.7,1159000,-0.1,0,-10,353.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-10-09_06-22-08
  done: false
  episode_len_mean: 356.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3230
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8054139322704739
          entropy_coeff: 0.009999999999999998
          kl: 0.011858241742896623
          policy_loss: -0.06867103061328332
          total_loss: -0.07550539521293508
          vf_explained_var: -0.8288940191268921
          vf_loss: 0.00040175020727070253
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1160,28660.4,1160000,-0.1,0,-10,356.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-10-09_06-22-31
  done: false
  episode_len_mean: 358.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3233
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8847783101929558
          entropy_coeff: 0.009999999999999998
          kl: 0.011232638874436419
          policy_loss: -0.04297484726541572
          total_loss: -0.051220489003592065
          vf_explained_var: -0.9926095604896545
          vf_loss: 0.00035484093079705617
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_trained: 1161000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1161,28683.9,1161000,-0.1,0,-10,358.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-10-09_06-22-55
  done: false
  episode_len_mean: 359.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3236
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8469753212398954
          entropy_coeff: 0.009999999999999998
          kl: 0.014671437942079353
          policy_loss: -0.06524226197765934
          total_loss: -0.06978371224055688
          vf_explained_var: -0.5376893281936646
          vf_loss: 0.0005438560728281219
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained: 1162000
  iterations_since_restore: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1162,28707.9,1162000,-0.1,0,-10,359.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-10-09_06-23-16
  done: false
  episode_len_mean: 360.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3238
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6795552531878153
          entropy_coeff: 0.009999999999999998
          kl: 0.010912700472919765
          policy_loss: -0.04635019159565369
          total_loss: -0.052703267687724695
          vf_explained_var: -0.8034349083900452
          vf_loss: 0.0004870492751554896
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained: 1163000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1163,28728.5,1163000,-0.1,0,-10,360.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-10-09_06-23-56
  done: false
  episode_len_mean: 360.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3241
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8733251492182414
          entropy_coeff: 0.009999999999999998
          kl: 0.011528572086472167
          policy_loss: -0.08624775402454866
          total_loss: -0.09405533720014823
          vf_explained_var: -0.9125235676765442
          vf_loss: 0.00040839588263123815
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_trained: 1164000
  iterations_since_restore: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1164,28769,1164000,-0.1,0,-10,360.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-10-09_06-24-19
  done: false
  episode_len_mean: 361.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3243
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6352484491136339
          entropy_coeff: 0.009999999999999998
          kl: 0.01499693236435083
          policy_loss: -0.11235684591035049
          total_loss: -0.11456229558421506
          vf_explained_var: -0.4221090078353882
          vf_loss: 0.0004656482363417227
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_trained: 1165000
  iterations_since_restore: 1165

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1165,28791.3,1165000,-0.1,0,-10,361.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-10-09_06-24-41
  done: false
  episode_len_mean: 364.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3246
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.86785489320755
          entropy_coeff: 0.009999999999999998
          kl: 0.012107348004238953
          policy_loss: -0.10301478753487268
          total_loss: -0.11021646629605028
          vf_explained_var: -0.4558138847351074
          vf_loss: 0.00043159363315983985
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained: 1166000
  iterations_since_restore: 1166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1166,28813.4,1166000,-0.1,0,-10,364.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-10-09_06-25-05
  done: false
  episode_len_mean: 364.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3249
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7662053333388434
          entropy_coeff: 0.009999999999999998
          kl: 0.011034281170947092
          policy_loss: -0.07472088630828592
          total_loss: -0.082070059577624
          vf_explained_var: -0.7080512046813965
          vf_loss: 0.00024653606378706173
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_trained: 1167000
  iterations_since_restore: 1167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1167,28837.9,1167000,-0.1,0,-10,364.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-10-09_06-25-29
  done: false
  episode_len_mean: 365.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3251
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8728127386834887
          entropy_coeff: 0.009999999999999998
          kl: 0.011647072176758064
          policy_loss: -0.07130519549051921
          total_loss: -0.07916845592359702
          vf_explained_var: -0.32800576090812683
          vf_loss: 0.00023948876187205315
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_trained: 1168000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1168,28861.4,1168000,-0.1,0,-10,365.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-10-09_06-25-55
  done: false
  episode_len_mean: 365.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3254
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8349392506811353
          entropy_coeff: 0.009999999999999998
          kl: 0.012087375547606586
          policy_loss: -0.09937073273791207
          total_loss: -0.10652477029297087
          vf_explained_var: -0.7361752986907959
          vf_loss: 0.00016829590798200417
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trained: 1169000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1169,28887.2,1169000,-0.1,0,-10,365.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-10-09_06-26-21
  done: false
  episode_len_mean: 364.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3257
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8339337375428941
          entropy_coeff: 0.009999999999999998
          kl: 0.012567564099590643
          policy_loss: -0.057274269519580735
          total_loss: -0.06394791064990892
          vf_explained_var: -0.3395036458969116
          vf_loss: 0.00020057394435247665
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_trained: 1170000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1170,28913.4,1170000,-0.1,0,-10,364.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-10-09_06-26-41
  done: false
  episode_len_mean: 365.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 3259
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.793259816699558
          entropy_coeff: 0.009999999999999998
          kl: 0.011031966519251786
          policy_loss: -0.041140831924147074
          total_loss: -0.04881433476176527
          vf_explained_var: -1.0
          vf_loss: 0.00019486523459717219
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_trained: 1171000
  iterations_since_restore: 1171
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1171,28933.3,1171000,-0.1,0,-10,365.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-10-09_06-27-04
  done: false
  episode_len_mean: 368.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 3262
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8217402815818786
          entropy_coeff: 0.009999999999999998
          kl: 0.009882259847414627
          policy_loss: -0.03248015340003702
          total_loss: -0.041516440071993406
          vf_explained_var: -0.8227645754814148
          vf_loss: 0.00016573721238981104
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trained: 1172000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1172,28956.1,1172000,-0.1,0,-10,368.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-10-09_06-27-29
  done: false
  episode_len_mean: 366.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3265
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8485193676418727
          entropy_coeff: 0.009999999999999998
          kl: 0.010719801799383314
          policy_loss: -0.12404609434306621
          total_loss: -0.1325706789890925
          vf_explained_var: -0.9454359412193298
          vf_loss: 0.00018115988552583277
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_trained: 1173000
  iterations_since_restore: 1173
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1173,28981.7,1173000,0,0,0,366.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-10-09_06-27-55
  done: false
  episode_len_mean: 365.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3268
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7612303296724956
          entropy_coeff: 0.009999999999999998
          kl: 0.010540446961573637
          policy_loss: -0.10340852588415146
          total_loss: -0.1112840029100577
          vf_explained_var: -0.16415050625801086
          vf_loss: 0.00012099679071171623
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained: 1174000
  iterations_since_restore: 1174
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1174,29007.7,1174000,0,0,0,365.3




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-10-09_06-28-38
  done: false
  episode_len_mean: 365.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3271
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7314379387431675
          entropy_coeff: 0.009999999999999998
          kl: 0.012443712307794168
          policy_loss: -0.13846741954071654
          total_loss: -0.144335834764772
          vf_explained_var: -0.23228304088115692
          vf_loss: 9.382429218223681e-05
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_trained: 1175000
  iterations_since_restore: 1175
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1175,29050.6,1175000,0,0,0,365.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-10-09_06-29-05
  done: false
  episode_len_mean: 364.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3274
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7449868824746875
          entropy_coeff: 0.009999999999999998
          kl: 0.010335999451362573
          policy_loss: -0.01181710875696606
          total_loss: -0.019768155821495586
          vf_explained_var: -0.01682046428322792
          vf_loss: 6.950824038843469e-05
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_trained: 1176000
  iterations_since_restore: 1176

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1176,29077.7,1176000,0,0,0,364.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-10-09_06-29-30
  done: false
  episode_len_mean: 365.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3276
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.9403866569201151
          entropy_coeff: 0.009999999999999998
          kl: 0.011799942890111787
          policy_loss: -0.08232950224644608
          total_loss: -0.09086291107038656
          vf_explained_var: -0.8612138628959656
          vf_loss: 0.00010561932322161738
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained: 1177000
  iterations_since_restore: 1177


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1177,29102.4,1177000,0,0,0,365.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-10-09_06-29-57
  done: false
  episode_len_mean: 366.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3279
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7026866171095105
          entropy_coeff: 0.009999999999999998
          kl: 0.012825669201963309
          policy_loss: -0.10842616069648
          total_loss: -0.11358118429780006
          vf_explained_var: -0.20303016901016235
          vf_loss: 0.00017125600045094164
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained: 1178000
  iterations_since_restore: 1178
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1178,29129.1,1178000,0,0,0,366.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-10-09_06-30-21
  done: false
  episode_len_mean: 365.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3282
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.7464562098185221
          entropy_coeff: 0.009999999999999998
          kl: 0.011356586665140413
          policy_loss: -0.07718242241276635
          total_loss: -0.08414332115401825
          vf_explained_var: -0.5449227690696716
          vf_loss: 0.00014328959631610715
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_trained: 1179000
  iterations_since_restore: 1179


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1179,29153.2,1179000,0,0,0,365.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-10-09_06-30-41
  done: false
  episode_len_mean: 368.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3284
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.387482112646103
          entropy_coeff: 0.009999999999999998
          kl: 0.008898137621421881
          policy_loss: -0.03884497594295277
          total_loss: -0.044506241815785566
          vf_explained_var: -0.4116908609867096
          vf_loss: 9.597127747029946e-05
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_trained: 1180000
  iterations_since_restore: 1180
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1180,29173.3,1180000,0,0,0,368.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-10-09_06-31-04
  done: false
  episode_len_mean: 371.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3286
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.6192446655697292
          entropy_coeff: 0.009999999999999998
          kl: 0.020185407864443956
          policy_loss: -0.010185230606132084
          total_loss: -0.0023522120383050707
          vf_explained_var: -0.4596159756183624
          vf_loss: 0.005610745562039989
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_trained: 1181000
  iterations_since_restore: 1181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1181,29196.3,1181000,0,0,0,371.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-10-09_06-31-30
  done: false
  episode_len_mean: 372.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3289
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.8639484193589952
          entropy_coeff: 0.009999999999999998
          kl: 0.007705700371388838
          policy_loss: -0.11062880953152975
          total_loss: -0.11867007679409451
          vf_explained_var: -0.5030547976493835
          vf_loss: 5.359299935258201e-05
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_trained: 1182000
  iterations_since_restore: 1182
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1182,29222.9,1182000,0,0,0,372.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-10-09_06-31-56
  done: false
  episode_len_mean: 372.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3292
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.9349634475178188
          entropy_coeff: 0.009999999999999998
          kl: 0.007918576994783747
          policy_loss: -0.0019591086647576758
          total_loss: -0.010412777794731988
          vf_explained_var: -0.9679680466651917
          vf_loss: 6.00384628139889e-05
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained: 1183000
  iterations_since_restore: 1183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1183,29248.5,1183000,0,0,0,372.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-10-09_06-32-21
  done: false
  episode_len_mean: 373.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7550510062111748
          entropy_coeff: 0.009999999999999998
          kl: 0.011044404343008414
          policy_loss: -0.13979624381495848
          total_loss: -0.14215561805499924
          vf_explained_var: -0.7205946445465088
          vf_loss: 7.77695662084928e-05
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_trained: 1184000
  iterations_since_restore: 1184
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1184,29273.4,1184000,0,0,0,373.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-10-09_06-32-47
  done: false
  episode_len_mean: 372.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3297
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.9408714360660977
          entropy_coeff: 0.009999999999999998
          kl: 0.008944718318583966
          policy_loss: -0.11977981956054767
          total_loss: -0.12688348932812613
          vf_explained_var: -0.8031095862388611
          vf_loss: 6.493028611455681e-05
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_trained: 1185000
  iterations_since_restore: 1185
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1185,29299.6,1185000,0,0,0,372.75




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-10-09_06-33-29
  done: false
  episode_len_mean: 373.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3300
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.840404130352868
          entropy_coeff: 0.009999999999999998
          kl: 0.00946701113803267
          policy_loss: -0.10203336088193787
          total_loss: -0.10739386149992546
          vf_explained_var: -0.9156218767166138
          vf_loss: 8.87054192490824e-05
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained: 1186000
  iterations_since_restore: 1186
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1186,29341.7,1186000,0,0,0,373.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-10-09_06-33-55
  done: false
  episode_len_mean: 372.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3303
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.6595379299587674
          entropy_coeff: 0.009999999999999998
          kl: 0.010091501008872841
          policy_loss: -0.12864009038441712
          total_loss: -0.1313586663454771
          vf_explained_var: -0.9040572047233582
          vf_loss: 6.740490114478032e-05
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_trained: 1187000
  iterations_since_restore: 1187
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1187,29367.4,1187000,0,0,0,372.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-10-09_06-34-16
  done: false
  episode_len_mean: 373.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3306
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7347865263621012
          entropy_coeff: 0.009999999999999998
          kl: 0.009487026422685889
          policy_loss: -0.08405386844856871
          total_loss: -0.08831064123660326
          vf_explained_var: -0.9609946608543396
          vf_loss: 0.00010887197471068552
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_trained: 1188000
  iterations_since_restore: 1188


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1188,29388.6,1188000,0,0,0,373.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-10-09_06-34-39
  done: false
  episode_len_mean: 371.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3308
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7536004424095153
          entropy_coeff: 0.009999999999999998
          kl: 0.01410608559580673
          policy_loss: -0.08749343049195078
          total_loss: -0.08559507810407214
          vf_explained_var: 0.09004804491996765
          vf_loss: 0.0001313267932144097
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_trained: 1189000
  iterations_since_restore: 1189
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1189,29411.5,1189000,0,0,0,371.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-10-09_06-35-03
  done: false
  episode_len_mean: 371.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3311
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7914612266752454
          entropy_coeff: 0.009999999999999998
          kl: 0.006639130677433118
          policy_loss: 0.0018232657263676326
          total_loss: -0.0069286844382683435
          vf_explained_var: -0.7163047790527344
          vf_loss: 7.75535592361444e-05
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained: 1190000
  iterations_since_restore: 1190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1190,29434.8,1190000,0,0,0,371.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-10-09_06-35-24
  done: false
  episode_len_mean: 372.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3313
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7705613017082213
          entropy_coeff: 0.009999999999999998
          kl: 0.007834522538578595
          policy_loss: -0.06977571381462945
          total_loss: -0.07668985433669553
          vf_explained_var: -0.6580043435096741
          vf_loss: 7.056445133962875e-05
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_trained: 1191000
  iterations_since_restore: 1191
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1191,29455.7,1191000,0,0,0,372.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-10-09_06-35-48
  done: false
  episode_len_mean: 372.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3316
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7720109661420187
          entropy_coeff: 0.009999999999999998
          kl: 0.008816519643345675
          policy_loss: -0.07216837242659595
          total_loss: -0.07776384761350022
          vf_explained_var: -1.0
          vf_loss: 5.994713119434891e-05
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_trained: 1192000
  iterations_since_restore: 1192
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1192,29480.1,1192000,0,0,0,372.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-10-09_06-36-13
  done: false
  episode_len_mean: 371.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3319
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.81568443775177
          entropy_coeff: 0.009999999999999998
          kl: 0.009755993276219104
          policy_loss: -0.03327116813096735
          total_loss: -0.038024865960081415
          vf_explained_var: -0.37423503398895264
          vf_loss: 5.2864866180849886e-05
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained: 1193000
  iterations_since_restore: 1193


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1193,29505.1,1193000,0,0,0,371.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-10-09_06-36-39
  done: false
  episode_len_mean: 369.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3322
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7814874490102133
          entropy_coeff: 0.009999999999999998
          kl: 0.010859839840446976
          policy_loss: -0.03370714613960849
          total_loss: -0.0365981401461694
          vf_explained_var: -0.7225144505500793
          vf_loss: 6.307530723764406e-05
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_trained: 1194000
  iterations_since_restore: 1194
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1194,29531.1,1194000,0,0,0,369.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-10-09_06-37-06
  done: false
  episode_len_mean: 366.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 3326
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7130860580338372
          entropy_coeff: 0.009999999999999998
          kl: 0.008804809869912628
          policy_loss: -0.003631422130597962
          total_loss: -0.008667784598138597
          vf_explained_var: -0.7138931155204773
          vf_loss: 4.583191556674946e-05
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_trained: 1195000
  iterations_since_restore: 1195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1195,29558,1195000,0,0,0,366.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-10-09_06-37-29
  done: false
  episode_len_mean: 365.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3328
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.9490975737571716
          entropy_coeff: 0.009999999999999998
          kl: 0.00960770304404025
          policy_loss: -0.08773325160145759
          total_loss: -0.09401787825756602
          vf_explained_var: -0.9851754903793335
          vf_loss: 5.899116206112214e-05
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 1196000
  iterations_since_restore: 1196
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1196,29580.8,1196000,0,0,0,365.76




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-10-09_06-38-12
  done: false
  episode_len_mean: 365.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3331
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.9115858581331042
          entropy_coeff: 0.009999999999999998
          kl: 0.010528772684926047
          policy_loss: -0.12181478432483143
          total_loss: -0.12646907389991813
          vf_explained_var: -0.8703523278236389
          vf_loss: 5.3803015463118856e-05
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained: 1197000
  iterations_since_restore: 1197


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1197,29623.9,1197000,0,0,0,365.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-10-09_06-38-36
  done: false
  episode_len_mean: 364.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3334
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.8448934939172532
          entropy_coeff: 0.009999999999999998
          kl: 0.00933437898668656
          policy_loss: -0.12489234939631488
          total_loss: -0.13052829907586178
          vf_explained_var: -0.769837498664856
          vf_loss: 3.9648318306717556e-05
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_trained: 1198000
  iterations_since_restore: 1198
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1198,29648.1,1198000,0,0,0,364.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-10-09_06-38-57
  done: false
  episode_len_mean: 365.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3337
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.5989855686823526
          entropy_coeff: 0.009999999999999998
          kl: 0.01048415074311317
          policy_loss: -0.12556285311778387
          total_loss: -0.1271277898715602
          vf_explained_var: -0.5162394046783447
          vf_loss: 7.821461557695228e-05
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_trained: 1199000
  iterations_since_restore: 1199
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1199,29669.1,1199000,0,0,0,365.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-10-09_06-39-18
  done: false
  episode_len_mean: 365.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3339
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.9449335707558526
          entropy_coeff: 0.009999999999999998
          kl: 0.010355236900304318
          policy_loss: -0.05333883170452383
          total_loss: -0.05856724853316943
          vf_explained_var: -0.6924934983253479
          vf_loss: 5.061992154272351e-05
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
  iterations_since_restore: 1200
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1200,29690.3,1200000,0,0,0,365.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-10-09_06-39-40
  done: false
  episode_len_mean: 364.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3342
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.8115230215920342
          entropy_coeff: 0.009999999999999998
          kl: 0.010395651123813741
          policy_loss: -0.060253340378403665
          total_loss: -0.06408028677105904
          vf_explained_var: -0.5577942132949829
          vf_loss: 6.268086738903851e-05
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_trained: 1201000
  iterations_since_restore: 1201


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1201,29712.3,1201000,0,0,0,364.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-10-09_06-40-05
  done: false
  episode_len_mean: 363.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3345
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7391663935449388
          entropy_coeff: 0.009999999999999998
          kl: 0.008759940438681153
          policy_loss: -0.08815347914480501
          total_loss: -0.09352586453573572
          vf_explained_var: -0.5192093253135681
          vf_loss: 3.201245522682762e-05
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_trained: 1202000
  iterations_since_restore: 1202
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1202,29736.5,1202000,0,0,0,363.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-10-09_06-40-28
  done: false
  episode_len_mean: 364.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3348
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.7685994214481777
          entropy_coeff: 0.009999999999999998
          kl: 0.009869834955044964
          policy_loss: -0.04454308546458682
          total_loss: -0.04867008501249883
          vf_explained_var: -0.5961704850196838
          vf_loss: 5.292865799775528e-05
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained: 1203000
  iterations_since_restore: 1203
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1203,29759.5,1203000,0,0,0,364.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-10-09_06-40-53
  done: false
  episode_len_mean: 362.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3351
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.232191440794203
          entropy_coeff: 0.009999999999999998
          kl: 0.004586831526063248
          policy_loss: -0.07129703048202726
          total_loss: -0.07727911215689447
          vf_explained_var: 0.03544517233967781
          vf_loss: 6.312839278608509e-05
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trained: 1204000
  iterations_since_restore: 1204
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1204,29785.2,1204000,0,0,0,362.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-10-09_06-41-18
  done: false
  episode_len_mean: 361.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3354
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.777876573138767
          entropy_coeff: 0.009999999999999998
          kl: 0.014096354530914063
          policy_loss: -0.09089391846209764
          total_loss: -0.0989975685874621
          vf_explained_var: -0.5622497797012329
          vf_loss: 3.025580018503307e-05
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_trained: 1205000
  iterations_since_restore: 1205
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1205,29809.6,1205000,0,0,0,361.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-10-09_06-41-43
  done: false
  episode_len_mean: 362.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3357
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.661946181456248
          entropy_coeff: 0.009999999999999998
          kl: 0.013029376266953236
          policy_loss: -0.09094004943552944
          total_loss: -0.09859438890384303
          vf_explained_var: -0.6232097148895264
          vf_loss: 5.0304341958609356e-05
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_trained: 1206000
  iterations_since_restore: 1206
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1206,29835.1,1206000,0,0,0,362.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-10-09_06-42-25
  done: false
  episode_len_mean: 358.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3360
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.5705040123727587
          entropy_coeff: 0.009999999999999998
          kl: 0.023541648844052552
          policy_loss: -0.030200807750225066
          total_loss: -0.029743910332520804
          vf_explained_var: 0.18238092958927155
          vf_loss: 5.452111042056155e-05
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trained: 1207000
  iterations_since_restore: 1207

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1207,29876.8,1207000,0,0,0,358.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-10-09_06-42-52
  done: false
  episode_len_mean: 356.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3363
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.6624542951583863
          entropy_coeff: 0.009999999999999998
          kl: 0.011566564053246017
          policy_loss: -0.08784432262182236
          total_loss: -0.09255819407602152
          vf_explained_var: -0.9485533833503723
          vf_loss: 3.9748736768766926e-05
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_trained: 1208000
  iterations_since_restore: 1208


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1208,29904.1,1208000,0,0,0,356.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-10-09_06-43-14
  done: false
  episode_len_mean: 357.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3365
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.8021737972895304
          entropy_coeff: 0.009999999999999998
          kl: 0.00991964506668397
          policy_loss: -0.12010629403715332
          total_loss: -0.12791832480579615
          vf_explained_var: -0.5422441959381104
          vf_loss: 2.9038074828551747e-05
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_trained: 1209000
  iterations_since_restore: 1209
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1209,29926.3,1209000,0,0,0,357.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-10-09_06-43-37
  done: false
  episode_len_mean: 359.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3368
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.8073955085542468
          entropy_coeff: 0.009999999999999998
          kl: 0.010502976191071748
          policy_loss: -0.0765000212730633
          total_loss: -0.08375436006527807
          vf_explained_var: -0.9191492795944214
          vf_loss: 4.0267576312342296e-05
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
  iterations_since_restore: 1210
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1210,29949.1,1210000,0,0,0,359.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-10-09_06-44-01
  done: false
  episode_len_mean: 359.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3371
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.66479434967041
          entropy_coeff: 0.009999999999999998
          kl: 0.012478009569255575
          policy_loss: -0.06467512022289965
          total_loss: -0.0684817023989227
          vf_explained_var: -0.8826696872711182
          vf_loss: 3.500689485412699e-05
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_trained: 1211000
  iterations_since_restore: 1211
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1211,29972.6,1211000,0,0,0,359.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-10-09_06-44-29
  done: false
  episode_len_mean: 358.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3374
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.5147760450839995
          entropy_coeff: 0.009999999999999998
          kl: 0.049470363397261095
          policy_loss: 0.04982306369476848
          total_loss: 0.08577601135604912
          vf_explained_var: 0.2624566853046417
          vf_loss: 0.000328595870248844
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trained: 1212000
  iterations_since_restore: 1212
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1212,30000.9,1212000,0,0,0,358.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-10-09_06-44-52
  done: false
  episode_len_mean: 357.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3377
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5394707083702084
          cur_lr: 5.000000000000001e-05
          entropy: 1.6936431739065383
          entropy_coeff: 0.009999999999999998
          kl: 0.006241569658788235
          policy_loss: -0.05332186956786447
          total_loss: -0.06060868778990375
          vf_explained_var: -0.9706218242645264
          vf_loss: 4.0899017727093044e-05
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_trained: 1213000
  iterations_since_restore: 1213


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1213,30023.6,1213000,0,0,0,357.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-10-09_06-45-15
  done: false
  episode_len_mean: 358.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3380
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5394707083702084
          cur_lr: 5.000000000000001e-05
          entropy: 1.7519084109200371
          entropy_coeff: 0.009999999999999998
          kl: 0.009397790682073647
          policy_loss: -0.030376771775384743
          total_loss: -0.03337340580506457
          vf_explained_var: -0.2744090259075165
          vf_loss: 5.483061363015117e-05
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_trained: 1214000
  iterations_since_restore: 1214


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1214,30046.7,1214000,0,0,0,358.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-10-09_06-45-41
  done: false
  episode_len_mean: 356.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3383
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5394707083702084
          cur_lr: 5.000000000000001e-05
          entropy: 1.582018984026379
          entropy_coeff: 0.009999999999999998
          kl: 0.027743056298050482
          policy_loss: 0.017696772515773774
          total_loss: 0.04489588439464569
          vf_explained_var: 0.0019340674625709653
          vf_loss: 0.0003096779667430383
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trained: 1215000
  iterations_since_restore: 1215
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1215,30072.7,1215000,0,0,0,356.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-10-09_06-46-05
  done: false
  episode_len_mean: 352.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3386
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.691508366001977
          entropy_coeff: 0.009999999999999998
          kl: 0.006133248541428671
          policy_loss: -0.07066950084020694
          total_loss: -0.07334841175211801
          vf_explained_var: -0.2122529298067093
          vf_loss: 7.324004008195415e-05
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_trained: 1216000
  iterations_since_restore: 1216
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1216,30096.2,1216000,0,0,0,352.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-10-09_06-46-31
  done: false
  episode_len_mean: 351.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3389
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.345361598332723
          entropy_coeff: 0.009999999999999998
          kl: 0.005728907923862556
          policy_loss: -0.03539831808043851
          total_loss: -0.03559348736372259
          vf_explained_var: -0.004835339263081551
          vf_loss: 2.9217809626263464e-05
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trained: 1217000
  iterations_since_restore: 1217


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1217,30122.2,1217000,0,0,0,351.46




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-10-09_06-47-11
  done: false
  episode_len_mean: 351.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3391
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.852451405260298
          entropy_coeff: 0.009999999999999998
          kl: 0.005135809277765456
          policy_loss: -0.048943574954238206
          total_loss: -0.05559334612141053
          vf_explained_var: -0.9584792256355286
          vf_loss: 1.5100138064857067e-05
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_trained: 1218000
  iterations_since_restore: 1218
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1218,30162.9,1218000,0,0,0,351.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-10-09_06-47-37
  done: false
  episode_len_mean: 349.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 3395
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.8015023046069676
          entropy_coeff: 0.009999999999999998
          kl: 0.005285766275202993
          policy_loss: -0.07153798213435544
          total_loss: -0.077320259436965
          vf_explained_var: -0.36157816648483276
          vf_loss: 2.6822546806215543e-05
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained: 1219000
  iterations_since_restore: 1219
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1219,30188.2,1219000,0,0,0,349.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-10-09_06-48-02
  done: false
  episode_len_mean: 349.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3398
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6242494609620837
          entropy_coeff: 0.009999999999999998
          kl: 0.006381410024271024
          policy_loss: -0.07313959310866064
          total_loss: -0.07453838862064811
          vf_explained_var: -0.5375955104827881
          vf_loss: 0.00010770895294424716
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trained: 1220000
  iterations_since_restore: 1220
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1220,30213.5,1220000,0,0,0,349.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-10-09_06-48-25
  done: false
  episode_len_mean: 349.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3400
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6993925624423556
          entropy_coeff: 0.009999999999999998
          kl: 0.00621030851360115
          policy_loss: -0.07368054816292392
          total_loss: -0.07629964753157563
          vf_explained_var: -0.7203401923179626
          vf_loss: 3.3943405873691925e-05
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trained: 1221000
  iterations_since_restore: 1221
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1221,30236.9,1221000,0,0,0,349.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-10-09_06-48-52
  done: false
  episode_len_mean: 348.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3403
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.615514498286777
          entropy_coeff: 0.009999999999999998
          kl: 0.005301594923773184
          policy_loss: -0.03503986339395245
          total_loss: -0.03893297780305147
          vf_explained_var: -0.6042172908782959
          vf_loss: 1.9554805152842567e-05
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_trained: 1222000
  iterations_since_restore: 1222
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1222,30263.3,1222000,0,0,0,348.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-10-09_06-49-15
  done: false
  episode_len_mean: 347.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3406
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6787084168857997
          entropy_coeff: 0.009999999999999998
          kl: 0.005691410033377172
          policy_loss: -0.08741024740868145
          total_loss: -0.09103739547232786
          vf_explained_var: -0.04697832465171814
          vf_loss: 1.729827701840097e-05
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_trained: 1223000
  iterations_since_restore: 1223
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1223,30286.9,1223000,0,0,0,347.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-10-09_06-49-38
  done: false
  episode_len_mean: 347.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3409
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.7470491952366298
          entropy_coeff: 0.009999999999999998
          kl: 0.005301151113641338
          policy_loss: 0.005636059254821804
          total_loss: 0.0004311373250352012
          vf_explained_var: -0.9310541152954102
          vf_loss: 2.411747362000622e-05
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_trained: 1224000
  iterations_since_restore: 1224
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1224,30309.6,1224000,0,0,0,347.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-10-09_06-50-02
  done: false
  episode_len_mean: 347.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3412
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.576426981555091
          entropy_coeff: 0.009999999999999998
          kl: 0.005977939392387426
          policy_loss: -0.06567911739564604
          total_loss: -0.06761621659000715
          vf_explained_var: -0.6730944514274597
          vf_loss: 2.2878071109112675e-05
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trained: 1225000
  iterations_since_restore: 1225
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1225,30333.5,1225000,0,0,0,347.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-10-09_06-50-25
  done: false
  episode_len_mean: 347.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3414
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.4809842109680176
          entropy_coeff: 0.009999999999999998
          kl: 0.007220452833720318
          policy_loss: 0.03638086915016174
          total_loss: 0.2095761179924011
          vf_explained_var: -0.44711926579475403
          vf_loss: 0.17133157431510174
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_trained: 1226000
  iterations_since_restore: 1226
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1226,30356.5,1226000,-0.12,0,-12,347.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-10-09_06-50-48
  done: false
  episode_len_mean: 346.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3417
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.7848088145256042
          entropy_coeff: 0.009999999999999998
          kl: 0.006067222536035185
          policy_loss: -0.13459541619651846
          total_loss: -0.1297732409917646
          vf_explained_var: -0.736051082611084
          vf_loss: 0.008659793788360225
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_trained: 1227000
  iterations_since_restore: 1227
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1227,30379.8,1227000,-0.12,0,-12,346.48




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-10-09_06-51-28
  done: false
  episode_len_mean: 348.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3420
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6766092485851711
          entropy_coeff: 0.009999999999999998
          kl: 0.007198212405310065
          policy_loss: -0.0943150680926111
          total_loss: -0.07934205929438273
          vf_explained_var: -0.13570274412631989
          vf_loss: 0.015116944003643261
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_trained: 1228000
  iterations_since_restore: 1228

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1228,30419.7,1228000,-0.12,0,-12,348.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-10-09_06-51-51
  done: false
  episode_len_mean: 349.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3423
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6415839738316007
          entropy_coeff: 0.009999999999999998
          kl: 0.006537525411144548
          policy_loss: -0.10450234243439303
          total_loss: -0.09790260067416562
          vf_explained_var: -0.5241643190383911
          vf_loss: 0.007919092691089544
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_trained: 1229000
  iterations_since_restore: 1229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1229,30442.8,1229000,-0.12,0,-12,349.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-10-09_06-52-17
  done: false
  episode_len_mean: 350.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3426
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.6268302308188545
          entropy_coeff: 0.009999999999999998
          kl: 0.005854658226044801
          policy_loss: -0.07668238576087687
          total_loss: -0.07641882037536965
          vf_explained_var: -0.7834868431091309
          vf_loss: 0.0030122534294302265
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_trained: 1230000
  iterations_since_restore: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1230,30467.9,1230000,-0.12,0,-12,350.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-10-09_06-52-42
  done: false
  episode_len_mean: 348.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3429
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.309206062555313
          cur_lr: 5.000000000000001e-05
          entropy: 1.707976934644911
          entropy_coeff: 0.009999999999999998
          kl: 0.004281847517273294
          policy_loss: -0.0912805136707094
          total_loss: -0.03649419380558862
          vf_explained_var: -0.4433310925960541
          vf_loss: 0.061978426319546996
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_trained: 1231000
  iterations_since_restore: 1231
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1231,30493.6,1231000,-0.16,0,-12,348.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-10-09_06-53-05
  done: false
  episode_len_mean: 349.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3432
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6864670395851136
          entropy_coeff: 0.009999999999999998
          kl: 0.00784696569938924
          policy_loss: -0.0555944943593608
          total_loss: -0.05714475926425722
          vf_explained_var: -0.25790321826934814
          vf_loss: 0.006254276077056096
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_trained: 1232000
  iterations_since_restore: 1232

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1232,30516.2,1232000,-0.16,0,-12,349.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-10-09_06-53-27
  done: false
  episode_len_mean: 350.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3434
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8390397363238864
          entropy_coeff: 0.009999999999999998
          kl: 0.007704796530479285
          policy_loss: -0.04587770802900195
          total_loss: -0.053701629986365636
          vf_explained_var: -0.6287428736686707
          vf_loss: 0.0016704939111756782
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trained: 1233000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1233,30538.3,1233000,-0.16,0,-12,350.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-10-09_06-53-49
  done: false
  episode_len_mean: 350.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3437
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6198297646310595
          entropy_coeff: 0.009999999999999998
          kl: 0.0073607792743938276
          policy_loss: -0.12807276958806646
          total_loss: -0.1302104855577151
          vf_explained_var: -0.4731793999671936
          vf_loss: 0.005561803778012593
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 1234000
  iterations_since_restore: 1234

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1234,30560.6,1234000,-0.16,0,-12,350


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-10-09_06-54-11
  done: false
  episode_len_mean: 349.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3439
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7848476039038763
          entropy_coeff: 0.009999999999999998
          kl: 0.006999541494193235
          policy_loss: -0.028796299195124043
          total_loss: -0.02580183976226383
          vf_explained_var: -0.5294093489646912
          vf_loss: 0.01276124255447131
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_trained: 1235000
  iterations_since_restore: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1235,30582.7,1235000,-0.16,0,-12,349.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-10-09_06-54-33
  done: false
  episode_len_mean: 349.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3442
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6135037899017335
          entropy_coeff: 0.009999999999999998
          kl: 0.009746594346672863
          policy_loss: -0.08670570384711027
          total_loss: -0.08273984406971269
          vf_explained_var: -0.8132295608520508
          vf_loss: 0.008847448291877906
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained: 1236000
  iterations_since_restore: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1236,30604.6,1236000,-0.16,0,-12,349.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-10-09_06-54-55
  done: false
  episode_len_mean: 350.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3445
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6536834902233548
          entropy_coeff: 0.009999999999999998
          kl: 0.007069853307979359
          policy_loss: -0.06404611195127169
          total_loss: -0.06856871189342605
          vf_explained_var: -0.3295273184776306
          vf_loss: 0.003851359834273656
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained: 1237000
  iterations_since_restore: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1237,30626.7,1237000,-0.16,0,-12,350.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-10-09_06-55-20
  done: false
  episode_len_mean: 350.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3448
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7810677660836114
          entropy_coeff: 0.009999999999999998
          kl: 0.011761296191114627
          policy_loss: -0.09711397447519832
          total_loss: -0.0982268779642052
          vf_explained_var: -0.1111801490187645
          vf_loss: 0.0031181461746907896
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_trained: 1238000
  iterations_since_restore: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1238,30650.8,1238000,-0.16,0,-12,350.13




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-10-09_06-56-00
  done: false
  episode_len_mean: 350.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3450
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8023683574464586
          entropy_coeff: 0.009999999999999998
          kl: 0.00925945231242476
          policy_loss: 0.008810519592629538
          total_loss: 0.0027599954667190713
          vf_explained_var: -0.9604050517082214
          vf_loss: 0.0012821678919458968
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_trained: 1239000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1239,30691.2,1239000,-0.16,0,-12,350.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-10-09_06-56-23
  done: false
  episode_len_mean: 351.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3453
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.744767435391744
          entropy_coeff: 0.009999999999999998
          kl: 0.010143862799319227
          policy_loss: -0.03332232165978187
          total_loss: -0.03800998322872652
          vf_explained_var: -0.6873782873153687
          vf_loss: 0.0010478802633264827
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_trained: 1240000
  iterations_since_restore: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1240,30714.1,1240000,-0.16,0,-12,351.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-10-09_06-56-48
  done: false
  episode_len_mean: 351.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3456
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6880050208833484
          entropy_coeff: 0.009999999999999998
          kl: 0.010906214363517582
          policy_loss: -0.052905918657779695
          total_loss: -0.05623398543232017
          vf_explained_var: -0.856585681438446
          vf_loss: 0.0009596337784185178
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_trained: 1241000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1241,30739.1,1241000,-0.16,0,-12,351.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-10-09_06-57-12
  done: false
  episode_len_mean: 352.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3459
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.566752134429084
          entropy_coeff: 0.009999999999999998
          kl: 0.009374423270293994
          policy_loss: -0.11038306198186344
          total_loss: -0.11432862364583546
          vf_explained_var: -0.8304812908172607
          vf_loss: 0.0008982205789329277
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained: 1242000
  iterations_since_restore: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1242,30763.4,1242000,-0.16,0,-12,352.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-10-09_06-57-36
  done: false
  episode_len_mean: 352.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3462
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6956753306918675
          entropy_coeff: 0.009999999999999998
          kl: 0.010322158408508978
          policy_loss: -0.08628695375389522
          total_loss: -0.09034622154302067
          vf_explained_var: -0.9803797602653503
          vf_loss: 0.0009794883889197889
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 1243000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1243,30787.1,1243000,-0.16,0,-12,352.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-10-09_06-57-59
  done: false
  episode_len_mean: 353.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3465
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.662272540728251
          entropy_coeff: 0.009999999999999998
          kl: 0.01085433886147764
          policy_loss: -0.16464914112455314
          total_loss: -0.16789583447906706
          vf_explained_var: -0.5330861806869507
          vf_loss: 0.00084357886969681
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained: 1244000
  iterations_since_restore: 1244
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1244,30810.6,1244000,-0.16,0,-12,353.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-10-09_06-58-25
  done: false
  episode_len_mean: 351.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3468
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6135507702827454
          entropy_coeff: 0.009999999999999998
          kl: 0.011423403124718866
          policy_loss: -0.11994138122018841
          total_loss: -0.12177360237886509
          vf_explained_var: -0.7904387712478638
          vf_loss: 0.0011137906322902483
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_trained: 1245000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1245,30836.6,1245000,-0.16,0,-12,351.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-10-09_06-58-49
  done: false
  episode_len_mean: 350.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3471
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6874288943078783
          entropy_coeff: 0.009999999999999998
          kl: 0.009586259171645375
          policy_loss: -0.09616261877947384
          total_loss: -0.10140760033908817
          vf_explained_var: -0.7928067445755005
          vf_loss: 0.0005609814132589639
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_trained: 1246000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1246,30860.1,1246000,-0.16,0,-12,350.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-10-09_06-59-15
  done: false
  episode_len_mean: 351.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3474
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.4358241981930202
          entropy_coeff: 0.009999999999999998
          kl: 0.007752370388054371
          policy_loss: -0.08682176673577892
          total_loss: -0.09152178871962759
          vf_explained_var: 0.2591063976287842
          vf_loss: 0.0007073099521221593
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained: 1247000
  iterations_since_restore: 124

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1247,30886.2,1247000,-0.16,0,-12,351.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-10-09_06-59-39
  done: false
  episode_len_mean: 352.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3477
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.769026305940416
          entropy_coeff: 0.009999999999999998
          kl: 0.010209396463584881
          policy_loss: -0.10880022967855135
          total_loss: -0.11386327168179883
          vf_explained_var: -0.7819318771362305
          vf_loss: 0.0008394225293563472
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 1248000
  iterations_since_restore: 1248

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1248,30909.9,1248000,-0.16,0,-12,352.2




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-10-09_07-00-20
  done: false
  episode_len_mean: 350.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3480
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5520073162184822
          entropy_coeff: 0.009999999999999998
          kl: 0.009943975256628138
          policy_loss: -0.12403017137613562
          total_loss: -0.12730895102851922
          vf_explained_var: -0.19558614492416382
          vf_loss: 0.0007599481016061165
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained: 1249000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1249,30951.1,1249000,-0.16,0,-12,350.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-10-09_07-00-43
  done: false
  episode_len_mean: 351.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3482
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7803514454099867
          entropy_coeff: 0.009999999999999998
          kl: 0.009104238306457704
          policy_loss: -0.10874380535549588
          total_loss: -0.11552509057025115
          vf_explained_var: -0.2732808589935303
          vf_loss: 0.0005104477691929788
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1250,30973.5,1250000,-0.16,0,-12,351.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-10-09_07-01-08
  done: false
  episode_len_mean: 351.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3485
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7910843729972838
          entropy_coeff: 0.009999999999999998
          kl: 0.008811949904291004
          policy_loss: -0.08741458073879281
          total_loss: -0.09469894014505877
          vf_explained_var: -0.4039558470249176
          vf_loss: 0.0004521790597613694
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained: 1251000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1251,30998.7,1251000,-0.16,0,-12,351.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-10-09_07-01-32
  done: false
  episode_len_mean: 351.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3488
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.3111688766214582
          entropy_coeff: 0.009999999999999998
          kl: 0.008503227184638994
          policy_loss: -0.07852543331682682
          total_loss: -0.08140294601519903
          vf_explained_var: -0.7292392253875732
          vf_loss: 0.00041632669688422335
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 1252000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1252,31023.4,1252000,-0.16,0,-12,351.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-10-09_07-01-56
  done: false
  episode_len_mean: 352.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3491
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7966782622867161
          entropy_coeff: 0.009999999999999998
          kl: 0.010098839898752873
          policy_loss: -0.07184971773789989
          total_loss: -0.07732409404383765
          vf_explained_var: -0.773314893245697
          vf_loss: 0.0008322550401013965
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained: 1253000
  iterations_since_restore: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1253,31047.4,1253000,-0.16,0,-12,352.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-10-09_07-02-18
  done: false
  episode_len_mean: 353.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3493
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8968851778242324
          entropy_coeff: 0.009999999999999998
          kl: 0.008778164100947262
          policy_loss: -0.07129587257901827
          total_loss: -0.0796817740218507
          vf_explained_var: -0.9501108527183533
          vf_loss: 0.0004476551580915435
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained: 1254000
  iterations_since_restore: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1254,31068.7,1254000,-0.16,0,-12,353.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-10-09_07-02-41
  done: false
  episode_len_mean: 353.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3496
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7951084719763861
          entropy_coeff: 0.009999999999999998
          kl: 0.01073664223656551
          policy_loss: -0.0883735484133164
          total_loss: -0.09363330557114548
          vf_explained_var: -0.7125953435897827
          vf_loss: 0.0002947714134481632
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained: 1255000
  iterations_since_restore: 1255


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1255,31091.8,1255000,-0.16,0,-12,353.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-10-09_07-03-07
  done: false
  episode_len_mean: 352.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3499
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.4009785638915169
          entropy_coeff: 0.009999999999999998
          kl: 0.009041777507185343
          policy_loss: -0.11605644327484899
          total_loss: -0.11924242882264985
          vf_explained_var: -0.250447154045105
          vf_loss: 0.0003841364709337035
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained: 1256000
  iterations_since_restore: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1256,31117.6,1256000,-0.16,0,-12,352.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-10-09_07-03-30
  done: false
  episode_len_mean: 353.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3502
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6209313816494413
          entropy_coeff: 0.009999999999999998
          kl: 0.010620177475838292
          policy_loss: -0.06055562723841932
          total_loss: -0.0638381313946512
          vf_explained_var: -0.7067265510559082
          vf_loss: 0.0006647195327483739
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained: 1257000
  iterations_since_restore: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1257,31140.5,1257000,-0.16,0,-12,353.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-10-09_07-03-53
  done: false
  episode_len_mean: 355.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3505
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8615548173586527
          entropy_coeff: 0.009999999999999998
          kl: 0.011700368050411688
          policy_loss: -0.11978077089620962
          total_loss: -0.12451475068098969
          vf_explained_var: -0.770691454410553
          vf_loss: 0.0003722892922168184
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained: 1258000
  iterations_since_restore: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1258,31164.1,1258000,-0.16,0,-12,355.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-10-09_07-04-17
  done: false
  episode_len_mean: 354.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3508
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6035817543665567
          entropy_coeff: 0.009999999999999998
          kl: 0.010045374092066971
          policy_loss: -0.10417912892169423
          total_loss: -0.10837648316389985
          vf_explained_var: -0.6095645427703857
          vf_loss: 0.00024004231486792884
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained: 1259000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1259,31187.9,1259000,-0.16,0,-12,354.21




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-10-09_07-05-01
  done: false
  episode_len_mean: 353.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3511
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.4897807161013286
          entropy_coeff: 0.009999999999999998
          kl: 0.009023120332902026
          policy_loss: -0.06154598235670063
          total_loss: -0.06576609915743271
          vf_explained_var: -0.613830029964447
          vf_loss: 0.0002595655835951523
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
  iterations_since_restore: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1260,31232,1260000,-0.16,0,-12,353.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-10-09_07-05-22
  done: false
  episode_len_mean: 353.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 3513
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.781473982334137
          entropy_coeff: 0.009999999999999998
          kl: 0.010713315254747653
          policy_loss: -0.10705364147822062
          total_loss: -0.11213687819739183
          vf_explained_var: -0.9491878151893616
          vf_loss: 0.00036187748014021457
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_trained: 1261000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1261,31252.6,1261000,-0.16,0,-12,353.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-10-09_07-05-45
  done: false
  episode_len_mean: 354.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3516
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7108498202429878
          entropy_coeff: 0.009999999999999998
          kl: 0.012468944467263536
          policy_loss: -0.0984481199334065
          total_loss: -0.10088416532509857
          vf_explained_var: -0.6149289608001709
          vf_loss: 0.0002757745584757585
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained: 1262000
  iterations_since_restore: 1262

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1262,31275.9,1262000,-0.04,0,-4,354.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-10-09_07-06-12
  done: false
  episode_len_mean: 352.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3519
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.490778422355652
          entropy_coeff: 0.009999999999999998
          kl: 0.007154826138500801
          policy_loss: -0.034917557446493046
          total_loss: -0.04146287813782692
          vf_explained_var: -0.7575862407684326
          vf_loss: 0.00010148014092313437
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trained: 1263000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1263,31302.3,1263000,-0.04,0,-4,352.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-10-09_07-06-37
  done: false
  episode_len_mean: 352.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3522
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8325332906511096
          entropy_coeff: 0.009999999999999998
          kl: 0.008726513039699017
          policy_loss: -0.04483738951385021
          total_loss: -0.05289790386127101
          vf_explained_var: -0.7358840703964233
          vf_loss: 0.0001891608694374251
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_trained: 1264000
  iterations_since_restore: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1264,31327.8,1264000,-0.04,0,-4,352.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-10-09_07-06-59
  done: false
  episode_len_mean: 353.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 3524
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8088356521394517
          entropy_coeff: 0.009999999999999998
          kl: 0.010113564165360097
          policy_loss: -0.08068718566662736
          total_loss: -0.08689413105862008
          vf_explained_var: -0.8979176878929138
          vf_loss: 0.00020425916025285714
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 1265000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1265,31349.7,1265000,-0.04,0,-4,353.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-10-09_07-07-22
  done: false
  episode_len_mean: 354.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3527
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7672617422209846
          entropy_coeff: 0.009999999999999998
          kl: 0.00982593710377946
          policy_loss: -0.12857763858305082
          total_loss: -0.1347414995647139
          vf_explained_var: -1.0
          vf_loss: 0.00016369912894636702
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_trained: 1266000
  iterations_since_restore: 1266
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1266,31372.9,1266000,-0.04,0,-4,354.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-10-09_07-07-42
  done: false
  episode_len_mean: 357.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3529
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7991851422521803
          entropy_coeff: 0.009999999999999998
          kl: 0.01250494072770307
          policy_loss: -0.09480745154950354
          total_loss: -0.09817949943244457
          vf_explained_var: -0.6714677214622498
          vf_loss: 0.0001815621826002219
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_trained: 1267000
  iterations_since_restore: 1267
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1267,31392.8,1267000,0,0,0,357.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-10-09_07-08-05
  done: false
  episode_len_mean: 358.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3532
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6802143798934088
          entropy_coeff: 0.009999999999999998
          kl: 0.008757529654244164
          policy_loss: -0.00042927604582574633
          total_loss: -0.006946634232170052
          vf_explained_var: -0.49820417165756226
          vf_loss: 0.0001733148727604809
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 1268000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1268,31415.8,1268000,0,0,0,358.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-10-09_07-08-32
  done: false
  episode_len_mean: 356.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3535
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.4332460304101309
          entropy_coeff: 0.009999999999999998
          kl: 0.013227600037811192
          policy_loss: -0.06078995967076884
          total_loss: -0.059685354265901776
          vf_explained_var: 0.11466821283102036
          vf_loss: 0.00016443980008868191
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained: 1269000
  iterations_since_restore: 1269

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1269,31442.6,1269000,0,0,0,356.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-10-09_07-08-55
  done: false
  episode_len_mean: 355.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3538
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6851549360487197
          entropy_coeff: 0.009999999999999998
          kl: 0.013453718870192491
          policy_loss: -0.014173435461190011
          total_loss: -0.01526658379783233
          vf_explained_var: -0.3045438826084137
          vf_loss: 0.0002246970564657305
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained: 1270000
  iterations_since_restore: 1270


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1270,31465.7,1270000,0,0,0,355.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-10-09_07-09-35
  done: false
  episode_len_mean: 355.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3541
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5036447909143236
          entropy_coeff: 0.009999999999999998
          kl: 0.010950875301344986
          policy_loss: -0.07189316008653905
          total_loss: -0.0740649745075239
          vf_explained_var: -0.04662179946899414
          vf_loss: 0.00022071914491890413
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trained: 1271000
  iterations_since_restore: 1271


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1271,31505.9,1271000,0,0,0,355.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-10-09_07-09-58
  done: false
  episode_len_mean: 355.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3543
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6536928508016797
          entropy_coeff: 0.009999999999999998
          kl: 0.009504791166967744
          policy_loss: -0.12478456592394246
          total_loss: -0.13024238157603477
          vf_explained_var: -0.34099552035331726
          vf_loss: 0.00010485148361314916
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_trained: 1272000
  iterations_since_restore: 1272

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1272,31528.8,1272000,0,0,0,355.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-10-09_07-10-22
  done: false
  episode_len_mean: 355.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3546
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5318263570467632
          entropy_coeff: 0.009999999999999998
          kl: 0.012094958248996097
          policy_loss: -0.06729205285923348
          total_loss: -0.06854236328767406
          vf_explained_var: -0.3844360411167145
          vf_loss: 0.00010307966569800758
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps_trained: 1273000
  iterations_since_restore: 1273


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1273,31552.8,1273000,0,0,0,355.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-10-09_07-10-45
  done: false
  episode_len_mean: 355.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3549
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.458247725168864
          entropy_coeff: 0.009999999999999998
          kl: 0.006353378422362333
          policy_loss: -0.15309930079513126
          total_loss: -0.10297621662418048
          vf_explained_var: -0.4587937295436859
          vf_loss: 0.05736993089004601
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained: 1274000
  iterations_since_restore: 1274
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1274,31575.5,1274000,-0.08,0,-8,355.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-10-09_07-11-07
  done: false
  episode_len_mean: 355.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3551
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6191472291946412
          entropy_coeff: 0.009999999999999998
          kl: 0.010659151753919517
          policy_loss: -0.10381953455507756
          total_loss: -0.10162482348581155
          vf_explained_var: -0.07989950478076935
          vf_loss: 0.006079094070527289
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_trained: 1275000
  iterations_since_restore: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1275,31597.8,1275000,-0.08,0,-8,355.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-10-09_07-11-29
  done: false
  episode_len_mean: 357.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3554
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7569994529088337
          entropy_coeff: 0.009999999999999998
          kl: 0.010941948268690303
          policy_loss: -0.11328564133081172
          total_loss: -0.11439047718627585
          vf_explained_var: -0.47695600986480713
          vf_loss: 0.0038315516676650277
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained: 1276000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1276,31619.1,1276000,-0.08,0,-8,357.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-10-09_07-11-52
  done: false
  episode_len_mean: 357.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3557
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6685772882567511
          entropy_coeff: 0.009999999999999998
          kl: 0.010246658185259723
          policy_loss: -0.10657349189536439
          total_loss: -0.10924730336086617
          vf_explained_var: -0.6085992455482483
          vf_loss: 0.0021811391511518093
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_trained: 1277000
  iterations_since_restore: 127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1277,31642.7,1277000,-0.08,0,-8,357.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-10-09_07-12-16
  done: false
  episode_len_mean: 358.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3560
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.586888407336341
          entropy_coeff: 0.009999999999999998
          kl: 0.01190879235262076
          policy_loss: -0.0403278907140096
          total_loss: -0.040355495363473895
          vf_explained_var: -0.4601297080516815
          vf_loss: 0.0020913497609500254
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained: 1278000
  iterations_since_restore: 1278


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1278,31666,1278000,-0.08,0,-8,358.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-10-09_07-12-41
  done: false
  episode_len_mean: 357.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3563
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.3855028033256531
          entropy_coeff: 0.009999999999999998
          kl: 0.007957062774598475
          policy_loss: -0.02353153112861845
          total_loss: -0.02734836255096727
          vf_explained_var: -0.12600047886371613
          vf_loss: 0.0008509483343611161
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained: 1279000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1279,31691.2,1279000,-0.08,0,-8,357.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-10-09_07-13-03
  done: false
  episode_len_mean: 358.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3565
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6443329215049745
          entropy_coeff: 0.009999999999999998
          kl: 0.012372425148144838
          policy_loss: -0.09100000322279003
          total_loss: -0.09162454642355441
          vf_explained_var: -0.5351088047027588
          vf_loss: 0.0015335457373617423
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained: 1280000
  iterations_since_restore: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1280,31713.7,1280000,-0.08,0,-8,358.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-10-09_07-13-24
  done: false
  episode_len_mean: 360.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3568
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6973279025819568
          entropy_coeff: 0.009999999999999998
          kl: 0.009557958192553853
          policy_loss: -0.0888112765426437
          total_loss: -0.09347298060440355
          vf_explained_var: -0.7477157711982727
          vf_loss: 0.001275925011982003
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained: 1281000
  iterations_since_restore: 1281


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1281,31734.5,1281000,-0.08,0,-8,360.37




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-10-09_07-14-01
  done: false
  episode_len_mean: 361.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3570
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7218888454967074
          entropy_coeff: 0.009999999999999998
          kl: 0.010632884961432273
          policy_loss: -0.06404139565096961
          total_loss: -0.06806682801494995
          vf_explained_var: -0.9655325412750244
          vf_loss: 0.0009166958353792628
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trained: 1282000
  iterations_since_restore: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1282,31771.5,1282000,-0.08,0,-8,361.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-10-09_07-14-22
  done: false
  episode_len_mean: 363.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3573
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.582024868329366
          entropy_coeff: 0.009999999999999998
          kl: 0.009460214712814422
          policy_loss: -0.04376704171299935
          total_loss: -0.04805466553403272
          vf_explained_var: -0.8540145754814148
          vf_loss: 0.0006098306487223858
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_trained: 1283000
  iterations_since_restore: 1283


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1283,31792.1,1283000,-0.08,0,-8,363.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-10-09_07-14-45
  done: false
  episode_len_mean: 364.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3575
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.870975869231754
          entropy_coeff: 0.009999999999999998
          kl: 0.008409084216937639
          policy_loss: -0.04717946280207899
          total_loss: -0.05558624791188373
          vf_explained_var: -0.9727808833122253
          vf_loss: 0.000593818843157755
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_trained: 1284000
  iterations_since_restore: 1284


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1284,31815.8,1284000,-0.08,0,-8,364.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-10-09_07-15-08
  done: false
  episode_len_mean: 364.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3578
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5927292002571953
          entropy_coeff: 0.009999999999999998
          kl: 0.008950820421172864
          policy_loss: -0.10528360911541515
          total_loss: -0.11041021131806904
          vf_explained_var: -0.37811583280563354
          vf_loss: 0.0004660466740865053
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained: 1285000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1285,31838.2,1285000,-0.08,0,-8,364.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-10-09_07-15-29
  done: false
  episode_len_mean: 366.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3580
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6779418640666537
          entropy_coeff: 0.009999999999999998
          kl: 0.0082330347067001
          policy_loss: -0.061703302255935136
          total_loss: -0.06872376745773687
          vf_explained_var: -0.661466658115387
          vf_loss: 0.00025306635094845355
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 1286000
  iterations_since_restore: 1286

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1286,31859.6,1286000,-0.08,0,-8,366.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-10-09_07-15-54
  done: false
  episode_len_mean: 365.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3583
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7606729070345561
          entropy_coeff: 0.009999999999999998
          kl: 0.0078106152612865765
          policy_loss: -0.09215622002051936
          total_loss: -0.10050204793612162
          vf_explained_var: -0.713527500629425
          vf_loss: 0.00024274314265413624
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained: 1287000
  iterations_since_restore: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1287,31884.2,1287000,-0.08,0,-8,365.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-10-09_07-16-15
  done: false
  episode_len_mean: 367.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3586
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6744914968808493
          entropy_coeff: 0.009999999999999998
          kl: 0.009782149757413608
          policy_loss: -0.04278924448622597
          total_loss: -0.04765988650421302
          vf_explained_var: -0.24581719934940338
          vf_loss: 0.000579771145971285
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_trained: 1288000
  iterations_since_restore: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1288,31905.4,1288000,-0.08,0,-8,367.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-10-09_07-16-39
  done: false
  episode_len_mean: 368.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3589
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7231887737909952
          entropy_coeff: 0.009999999999999998
          kl: 0.0099185699294375
          policy_loss: -0.0601045302218861
          total_loss: -0.0655055977197157
          vf_explained_var: -0.5945062637329102
          vf_loss: 0.00037881018167051174
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_trained: 1289000
  iterations_since_restore: 1289
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1289,31929,1289000,-0.08,0,-8,368.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-10-09_07-17-01
  done: false
  episode_len_mean: 369.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3591
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.683842623233795
          entropy_coeff: 0.009999999999999998
          kl: 0.009811030263914075
          policy_loss: -0.0008838807129197651
          total_loss: -0.00601073768403795
          vf_explained_var: -0.38780033588409424
          vf_loss: 0.0003837225850374024
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained: 1290000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1290,31951.2,1290000,-0.08,0,-8,369.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-10-09_07-17-23
  done: false
  episode_len_mean: 369.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3594
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.9219914886686538
          entropy_coeff: 0.009999999999999998
          kl: 0.00929015034352151
          policy_loss: -0.07141678006284767
          total_loss: -0.0796541576170259
          vf_explained_var: -0.25438636541366577
          vf_loss: 0.0002561008722598975
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_trained: 1291000
  iterations_since_restore: 1291

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1291,31973.5,1291000,-0.08,0,-8,369.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-10-09_07-17-44
  done: false
  episode_len_mean: 370.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3596
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7110390212800768
          entropy_coeff: 0.009999999999999998
          kl: 0.008790854275204572
          policy_loss: -0.06827266665382517
          total_loss: -0.07507514336870777
          vf_explained_var: -0.486650675535202
          vf_loss: 0.00015796816514921375
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained: 1292000
  iterations_since_restore: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1292,31994,1292000,-0.08,0,-8,370.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-10-09_07-18-06
  done: false
  episode_len_mean: 372.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3599
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.9116845197147794
          entropy_coeff: 0.009999999999999998
          kl: 0.009783339643730432
          policy_loss: -0.0594598937779665
          total_loss: -0.06709233899083403
          vf_explained_var: -0.8665064573287964
          vf_loss: 0.00018852375497873354
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained: 1293000
  iterations_since_restore: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1293,32016.2,1293000,-0.08,0,-8,372.58




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-10-09_07-18-46
  done: false
  episode_len_mean: 373.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3601
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7897206544876099
          entropy_coeff: 0.009999999999999998
          kl: 0.010586015962537183
          policy_loss: -0.06098808786935277
          total_loss: -0.06637756356762516
          vf_explained_var: -0.9962795972824097
          vf_loss: 0.00028508512979088764
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_trained: 1294000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1294,32056.2,1294000,-0.08,0,-8,373.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-10-09_07-19-11
  done: false
  episode_len_mean: 372.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3604
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6547443588574728
          entropy_coeff: 0.009999999999999998
          kl: 0.009400068773406763
          policy_loss: -0.049153220819102394
          total_loss: -0.054688336410456236
          vf_explained_var: -0.21570053696632385
          vf_loss: 0.00015897998382570223
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 1295000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1295,32080.6,1295000,-0.08,0,-8,372.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-10-09_07-19-34
  done: false
  episode_len_mean: 373.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3607
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8337866173850166
          entropy_coeff: 0.009999999999999998
          kl: 0.01008717849056505
          policy_loss: -0.06941373106092215
          total_loss: -0.07591985244717862
          vf_explained_var: -0.8705345988273621
          vf_loss: 0.00018505582807443313
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_trained: 1296000
  iterations_since_restore: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1296,32103.8,1296000,-0.08,0,-8,373.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-10-09_07-19-56
  done: false
  episode_len_mean: 374.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3609
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.787684084309472
          entropy_coeff: 0.009999999999999998
          kl: 0.009752034131042819
          policy_loss: -0.09860144007123178
          total_loss: -0.10503547239220805
          vf_explained_var: -0.7500527501106262
          vf_loss: 0.00018308086049122115
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained: 1297000
  iterations_since_restore: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1297,32125.7,1297000,-0.08,0,-8,374.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-10-09_07-20-17
  done: false
  episode_len_mean: 375.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3612
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7800900724199082
          entropy_coeff: 0.009999999999999998
          kl: 0.013632820466960993
          policy_loss: -0.0940185592820247
          total_loss: -0.09574873857200146
          vf_explained_var: -0.4889248013496399
          vf_loss: 0.00033022528934654674
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trained: 1298000
  iterations_since_restore: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1298,32147.2,1298000,-0.08,0,-8,375.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-10-09_07-20-42
  done: false
  episode_len_mean: 374.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3615
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5788100136650933
          entropy_coeff: 0.009999999999999998
          kl: 0.009623033443259309
          policy_loss: -0.06629615964161026
          total_loss: -0.07081554416153166
          vf_explained_var: 0.04250389337539673
          vf_loss: 0.00015793479194851695
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 1299000
  iterations_since_restore: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1299,32171.7,1299000,-0.08,0,-8,374.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-10-09_07-21-05
  done: false
  episode_len_mean: 374.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3617
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7561236421267192
          entropy_coeff: 0.009999999999999998
          kl: 0.009503747924694755
          policy_loss: -0.04214294482436445
          total_loss: -0.048604991473257544
          vf_explained_var: -0.19544687867164612
          vf_loss: 0.00012613554891787417
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1300,32195,1300000,-0.08,0,-8,374.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-10-09_07-21-26
  done: false
  episode_len_mean: 376.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3620
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7994579275449116
          entropy_coeff: 0.009999999999999998
          kl: 0.011099458489376761
          policy_loss: -0.08593529396586948
          total_loss: -0.09095205954379505
          vf_explained_var: -0.6342238783836365
          vf_loss: 0.0001623465209074008
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_trained: 1301000
  iterations_since_restore: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1301,32216,1301000,-0.08,0,-8,376.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-10-09_07-21-49
  done: false
  episode_len_mean: 377.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3623
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.904247808456421
          entropy_coeff: 0.009999999999999998
          kl: 0.008753210072823907
          policy_loss: -0.06365269617074065
          total_loss: -0.07250849136875735
          vf_explained_var: -1.0
          vf_loss: 8.019887738757663e-05
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained: 1302000
  iterations_since_restore: 1302
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1302,32238.9,1302000,-0.08,0,-8,377.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-10-09_07-22-11
  done: false
  episode_len_mean: 377.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3625
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.851169302728441
          entropy_coeff: 0.009999999999999998
          kl: 0.010589494307459566
          policy_loss: -0.11231688859148158
          total_loss: -0.11847481797966693
          vf_explained_var: -0.755305826663971
          vf_loss: 0.00012710197285438577
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained: 1303000
  iterations_since_restore: 1303

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1303,32261,1303000,-0.08,0,-8,377.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-10-09_07-22-33
  done: false
  episode_len_mean: 377.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3628
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8324684315257602
          entropy_coeff: 0.009999999999999998
          kl: 0.008713703854511657
          policy_loss: -0.09760526998175514
          total_loss: -0.1057998572372728
          vf_explained_var: -0.46424487233161926
          vf_loss: 6.922706262331404e-05
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_trained: 1304000
  iterations_since_restore: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1304,32283.1,1304000,-0.08,0,-8,377.54




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-10-09_07-23-15
  done: false
  episode_len_mean: 374.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3631
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.816607301765018
          entropy_coeff: 0.009999999999999998
          kl: 0.007757133515765899
          policy_loss: -0.04964347038831976
          total_loss: -0.058808420495026645
          vf_explained_var: -0.7638091444969177
          vf_loss: 4.471305902471714e-05
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained: 1305000
  iterations_since_restore: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1305,32324.5,1305000,-0.08,0,-8,374.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-10-09_07-23-37
  done: false
  episode_len_mean: 375.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3633
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6978338850869072
          entropy_coeff: 0.009999999999999998
          kl: 0.010501787957724476
          policy_loss: -0.06957264447377788
          total_loss: -0.07433419140676657
          vf_explained_var: -0.6725415587425232
          vf_loss: 9.139511930698064e-05
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 1306000
  iterations_since_restore: 1306

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1306,32347.2,1306000,-0.08,0,-8,375.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-10-09_07-24-01
  done: false
  episode_len_mean: 376.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3636
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.684989176856147
          entropy_coeff: 0.009999999999999998
          kl: 0.011100318652280252
          policy_loss: -0.09274380943841404
          total_loss: -0.09664352403746711
          vf_explained_var: -0.7345205545425415
          vf_loss: 0.00013371415431417215
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_trained: 1307000
  iterations_since_restore: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1307,32370.9,1307000,-0.08,0,-8,376.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-10-09_07-24-25
  done: false
  episode_len_mean: 374.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3639
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7293973101509943
          entropy_coeff: 0.009999999999999998
          kl: 0.00979850439313383
          policy_loss: -0.0598252270784643
          total_loss: -0.06574399400916364
          vf_explained_var: 0.17452028393745422
          vf_loss: 6.182364286360098e-05
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 1308000
  iterations_since_restore: 1308


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1308,32395.3,1308000,-0.08,0,-8,374.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-10-09_07-24-46
  done: false
  episode_len_mean: 376.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3642
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7453949199782477
          entropy_coeff: 0.009999999999999998
          kl: 0.008438673626686446
          policy_loss: -0.0625327081626488
          total_loss: -0.07016193022330602
          vf_explained_var: -0.9623810052871704
          vf_loss: 8.140756395328532e-05
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 1309000
  iterations_since_restore: 1309

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1309,32416.2,1309000,-0.08,0,-8,376.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-10-09_07-25-09
  done: false
  episode_len_mean: 376.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 3644
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.636219765080346
          entropy_coeff: 0.009999999999999998
          kl: 0.008404416351614192
          policy_loss: -0.08532265013911658
          total_loss: -0.09193580194065969
          vf_explained_var: -0.6657971739768982
          vf_loss: 4.5279733967618085e-05
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
  iterations_since_restore: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1310,32438.7,1310000,-0.08,0,-8,376.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-10-09_07-25-33
  done: false
  episode_len_mean: 376.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 3647
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7975154863463507
          entropy_coeff: 0.009999999999999998
          kl: 0.007547239500408251
          policy_loss: 0.02992558139893744
          total_loss: 0.02079800296988752
          vf_explained_var: -0.4406988322734833
          vf_loss: 0.00013351135724709213
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained: 1311000
  iterations_since_restore: 1311

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1311,32462.3,1311000,-0.08,0,-8,376.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-10-09_07-25-57
  done: false
  episode_len_mean: 376.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3650
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8316812382804022
          entropy_coeff: 0.009999999999999998
          kl: 0.009057532041035246
          policy_loss: -0.06949477264036735
          total_loss: -0.07730032965126965
          vf_explained_var: -0.6596264243125916
          vf_loss: 5.340114245579268e-05
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained: 1312000
  iterations_since_restore: 1312
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1312,32487,1312000,0,0,0,376


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-10-09_07-26-19
  done: false
  episode_len_mean: 375.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3653
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.856639289855957
          entropy_coeff: 0.009999999999999998
          kl: 0.010698087866454702
          policy_loss: -0.11495101458082596
          total_loss: -0.12110949386325147
          vf_explained_var: -0.16336733102798462
          vf_loss: 5.5868033652788856e-05
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained: 1313000
  iterations_since_restore: 1313


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1313,32508.4,1313000,0,0,0,375.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-10-09_07-26-41
  done: false
  episode_len_mean: 375.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3655
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6265047576692369
          entropy_coeff: 0.009999999999999998
          kl: 0.012781663008915246
          policy_loss: -0.09650089082618554
          total_loss: -0.09791848216619757
          vf_explained_var: -0.1644349992275238
          vf_loss: 8.970730300966857e-05
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained: 1314000
  iterations_since_restore: 1314
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1314,32530.8,1314000,0,0,0,375.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-10-09_07-27-06
  done: false
  episode_len_mean: 375.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3658
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5572387244966295
          entropy_coeff: 0.009999999999999998
          kl: 0.008963458669857972
          policy_loss: -0.07496751404056946
          total_loss: -0.08012928041732974
          vf_explained_var: -0.7313192486763
          vf_loss: 6.138708985316851e-05
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained: 1315000
  iterations_since_restore: 1315
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1315,32555.5,1315000,0,0,0,375.35




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-10-09_07-27-48
  done: false
  episode_len_mean: 374.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3661
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7021454572677612
          entropy_coeff: 0.009999999999999998
          kl: 0.009856313453762805
          policy_loss: -0.05980071443029576
          total_loss: -0.0653493524218599
          vf_explained_var: -0.3149592876434326
          vf_loss: 9.268929271557782e-05
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_trained: 1316000
  iterations_since_restore: 1316
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1316,32597.7,1316000,0,0,0,374.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-10-09_07-28-11
  done: false
  episode_len_mean: 375.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3664
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6736215922567579
          entropy_coeff: 0.009999999999999998
          kl: 0.009195904009157976
          policy_loss: -0.08149666111502382
          total_loss: -0.08755760863423348
          vf_explained_var: -0.4383583068847656
          vf_loss: 5.7650777449048795e-05
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trained: 1317000
  iterations_since_restore: 1317
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1317,32620.3,1317000,0,0,0,375.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-10-09_07-28-33
  done: false
  episode_len_mean: 375.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3666
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6747222516271802
          entropy_coeff: 0.009999999999999998
          kl: 0.010821594750167425
          policy_loss: -0.12564086616039277
          total_loss: -0.1298420322438081
          vf_explained_var: -0.34291520714759827
          vf_loss: 5.1409791032913036e-05
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_trained: 1318000
  iterations_since_restore: 1318


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1318,32642.6,1318000,0,0,0,375.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-10-09_07-28-55
  done: false
  episode_len_mean: 374.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3669
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8002411895328099
          entropy_coeff: 0.009999999999999998
          kl: 0.011412918971385485
          policy_loss: -0.11270368869105975
          total_loss: -0.11744256768789556
          vf_explained_var: -0.4770371615886688
          vf_loss: 8.61462316404666e-05
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trained: 1319000
  iterations_since_restore: 1319
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1319,32665,1319000,0,0,0,374.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-10-09_07-29-16
  done: false
  episode_len_mean: 374.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3672
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5540975915061104
          entropy_coeff: 0.009999999999999998
          kl: 0.006538899013752244
          policy_loss: -0.030478121567931442
          total_loss: -0.038378223445680405
          vf_explained_var: -0.29366645216941833
          vf_loss: 9.104000594864678e-05
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
  iterations_since_restore: 1320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1320,32685.6,1320000,0,0,0,374.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-10-09_07-29-40
  done: false
  episode_len_mean: 373.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3674
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7885107715924582
          entropy_coeff: 0.009999999999999998
          kl: 0.012059156812641265
          policy_loss: -0.1227455674774117
          total_loss: -0.1266684359146489
          vf_explained_var: -0.30657801032066345
          vf_loss: 3.8698255735249e-05
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained: 1321000
  iterations_since_restore: 1321
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1321,32709.3,1321000,0,0,0,373.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-10-09_07-30-05
  done: false
  episode_len_mean: 372.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3677
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5535516606436834
          entropy_coeff: 0.009999999999999998
          kl: 0.012345876539909402
          policy_loss: -0.02318279527955585
          total_loss: -0.024409082449144786
          vf_explained_var: -0.21654170751571655
          vf_loss: 5.464356233157256e-05
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained: 1322000
  iterations_since_restore: 1322

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1322,32735,1322000,0,0,0,372.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-10-09_07-30-28
  done: false
  episode_len_mean: 371.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3680
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7999947243266636
          entropy_coeff: 0.009999999999999998
          kl: 0.010258135398400212
          policy_loss: -0.1005080638349884
          total_loss: -0.1066250286789404
          vf_explained_var: -0.5821883082389832
          vf_loss: 3.8910198984214934e-05
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained: 1323000
  iterations_since_restore: 1323
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1323,32757.7,1323000,0,0,0,371.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-10-09_07-30-50
  done: false
  episode_len_mean: 372.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3683
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7253515773349337
          entropy_coeff: 0.009999999999999998
          kl: 0.009938308956063792
          policy_loss: -0.07985676305575504
          total_loss: -0.0856116901876198
          vf_explained_var: 0.056984562426805496
          vf_loss: 2.378999693064543e-05
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_trained: 1324000
  iterations_since_restore: 1324
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1324,32779.1,1324000,0,0,0,372.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-10-09_07-31-14
  done: false
  episode_len_mean: 370.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3686
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6804621431562636
          entropy_coeff: 0.009999999999999998
          kl: 0.006946135393233159
          policy_loss: -0.0008815078064799309
          total_loss: -0.009646391868591309
          vf_explained_var: -0.5418189764022827
          vf_loss: 1.9710448587930944e-05
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained: 1325000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1325,32803.7,1325000,0,0,0,370.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1326000
  custom_metrics: {}
  date: 2021-10-09_07-31-39
  done: false
  episode_len_mean: 371.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3688
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5766722281773886
          entropy_coeff: 0.009999999999999998
          kl: 0.01143359794042325
          policy_loss: -0.05692705764538712
          total_loss: -0.05944881590290202
          vf_explained_var: -0.5861759781837463
          vf_loss: 4.3699008609918466e-05
    num_agent_steps_sampled: 1326000
    num_agent_steps_trained: 1326000
    num_steps_sampled: 1326000
    num_steps_trained: 1326000
  iterations_since_restore: 1326
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1326,32828.6,1326000,0,0,0,371.07




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1327000
  custom_metrics: {}
  date: 2021-10-09_07-32-22
  done: false
  episode_len_mean: 368.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 3692
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.390015066994561
          entropy_coeff: 0.009999999999999998
          kl: 0.014563865806349094
          policy_loss: -0.04732961211767461
          total_loss: -0.04316639755335119
          vf_explained_var: -0.16379107534885406
          vf_loss: 0.0012478847971376834
    num_agent_steps_sampled: 1327000
    num_agent_steps_trained: 1327000
    num_steps_sampled: 1327000
    num_steps_trained: 1327000
  iterations_since_restore: 1327
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1327,32871.4,1327000,0,0,0,368


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1328000
  custom_metrics: {}
  date: 2021-10-09_07-32-46
  done: false
  episode_len_mean: 366.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3695
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8276239527596367
          entropy_coeff: 0.009999999999999998
          kl: 0.008768959235018216
          policy_loss: -0.03351552339477672
          total_loss: -0.04161409218278196
          vf_explained_var: -0.5904207229614258
          vf_loss: 5.300482844177168e-05
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_steps_sampled: 1328000
    num_steps_trained: 1328000
  iterations_since_restore: 1328
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1328,32895.3,1328000,0,0,0,366.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1329000
  custom_metrics: {}
  date: 2021-10-09_07-33-10
  done: false
  episode_len_mean: 365.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3697
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8232651511828104
          entropy_coeff: 0.009999999999999998
          kl: 0.008659351562491166
          policy_loss: -0.07368214796814654
          total_loss: -0.08188301212050848
          vf_explained_var: -0.7544248104095459
          vf_loss: 3.367288795238387e-05
    num_agent_steps_sampled: 1329000
    num_agent_steps_trained: 1329000
    num_steps_sampled: 1329000
    num_steps_trained: 1329000
  iterations_since_restore: 1329
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1329,32919.1,1329000,0,0,0,365.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1330000
  custom_metrics: {}
  date: 2021-10-09_07-33-34
  done: false
  episode_len_mean: 364.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3700
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5927644451459249
          entropy_coeff: 0.009999999999999998
          kl: 0.009929106987280651
          policy_loss: -0.07235783566203383
          total_loss: -0.0767987113032076
          vf_explained_var: -0.5366379022598267
          vf_loss: 2.258913927006587e-05
    num_agent_steps_sampled: 1330000
    num_agent_steps_trained: 1330000
    num_steps_sampled: 1330000
    num_steps_trained: 1330000
  iterations_since_restore: 1330
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1330,32943.4,1330000,0,0,0,364.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1331000
  custom_metrics: {}
  date: 2021-10-09_07-34-01
  done: false
  episode_len_mean: 363.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3703
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.490303013059828
          entropy_coeff: 0.009999999999999998
          kl: 0.011839839380763332
          policy_loss: -0.03732493552896712
          total_loss: -0.038538176731930836
          vf_explained_var: -0.02776498533785343
          vf_loss: 1.9474806291934024e-05
    num_agent_steps_sampled: 1331000
    num_agent_steps_trained: 1331000
    num_steps_sampled: 1331000
    num_steps_trained: 1331000
  iterations_since_restore: 1331

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1331,32970.3,1331000,0,0,0,363.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1332000
  custom_metrics: {}
  date: 2021-10-09_07-34-26
  done: false
  episode_len_mean: 361.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 3707
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.4173364056481255
          entropy_coeff: 0.009999999999999998
          kl: 0.008051647804661617
          policy_loss: -0.03168413158920076
          total_loss: -0.03652893958820237
          vf_explained_var: 0.08195449411869049
          vf_loss: 3.209933150477203e-05
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_steps_sampled: 1332000
    num_steps_trained: 1332000
  iterations_since_restore: 1332
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1332,32995.3,1332000,0,0,0,361.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1333000
  custom_metrics: {}
  date: 2021-10-09_07-34-52
  done: false
  episode_len_mean: 359.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3710
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6823965986569722
          entropy_coeff: 0.009999999999999998
          kl: 0.01135264574914052
          policy_loss: -0.04872338653852542
          total_loss: -0.05241301678535011
          vf_explained_var: -0.2308911681175232
          vf_loss: 2.6539144235155416e-05
    num_agent_steps_sampled: 1333000
    num_agent_steps_trained: 1333000
    num_steps_sampled: 1333000
    num_steps_trained: 1333000
  iterations_since_restore: 1333
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1333,33020.8,1333000,0,0,0,359.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1334000
  custom_metrics: {}
  date: 2021-10-09_07-35-16
  done: false
  episode_len_mean: 357.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3713
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6110660738415188
          entropy_coeff: 0.009999999999999998
          kl: 0.009978588866744471
          policy_loss: -0.06759734445561966
          total_loss: -0.07215594268507428
          vf_explained_var: -0.5700053572654724
          vf_loss: 3.075254690985376e-05
    num_agent_steps_sampled: 1334000
    num_agent_steps_trained: 1334000
    num_steps_sampled: 1334000
    num_steps_trained: 1334000
  iterations_since_restore: 1334
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1334,33045.6,1334000,0,0,0,357.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1335000
  custom_metrics: {}
  date: 2021-10-09_07-35-42
  done: false
  episode_len_mean: 356.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3716
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.622738050089942
          entropy_coeff: 0.009999999999999998
          kl: 0.009128170531026904
          policy_loss: 0.09317021792133649
          total_loss: 0.08757406630449825
          vf_explained_var: 0.14863964915275574
          vf_loss: 9.181399477205963e-05
    num_agent_steps_sampled: 1335000
    num_agent_steps_trained: 1335000
    num_steps_sampled: 1335000
    num_steps_trained: 1335000
  iterations_since_restore: 1335
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1335,33071.8,1335000,0,0,0,356.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1336000
  custom_metrics: {}
  date: 2021-10-09_07-36-08
  done: false
  episode_len_mean: 355.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3719
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6832234130965338
          entropy_coeff: 0.009999999999999998
          kl: 0.007374782318814215
          policy_loss: -0.07577515484558212
          total_loss: -0.08406824461287922
          vf_explained_var: 0.275957316160202
          vf_loss: 2.4201497247607524e-05
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_steps_sampled: 1336000
    num_steps_trained: 1336000
  iterations_since_restore: 1336
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1336,33096.8,1336000,0,0,0,355.6




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1337000
  custom_metrics: {}
  date: 2021-10-09_07-36-51
  done: false
  episode_len_mean: 352.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3722
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5855945256021289
          entropy_coeff: 0.009999999999999998
          kl: 0.010148481697026195
          policy_loss: -0.07520161536004809
          total_loss: -0.0793052814900875
          vf_explained_var: 0.12310728430747986
          vf_loss: 3.480772340683163e-05
    num_agent_steps_sampled: 1337000
    num_agent_steps_trained: 1337000
    num_steps_sampled: 1337000
    num_steps_trained: 1337000
  iterations_since_restore: 1337
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1337,33140.3,1337000,0,0,0,352.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1338000
  custom_metrics: {}
  date: 2021-10-09_07-37-15
  done: false
  episode_len_mean: 351.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3725
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6914275964101155
          entropy_coeff: 0.009999999999999998
          kl: 0.008615484338618373
          policy_loss: -0.09198127831849787
          total_loss: -0.09890894889831543
          vf_explained_var: 0.03403490409255028
          vf_loss: 3.914186368597762e-05
    num_agent_steps_sampled: 1338000
    num_agent_steps_trained: 1338000
    num_steps_sampled: 1338000
    num_steps_trained: 1338000
  iterations_since_restore: 1338
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1338,33164.6,1338000,0,0,0,351.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1339000
  custom_metrics: {}
  date: 2021-10-09_07-37-42
  done: false
  episode_len_mean: 349.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3728
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7708319425582886
          entropy_coeff: 0.009999999999999998
          kl: 0.008637978787134761
          policy_loss: -0.1515024911198351
          total_loss: -0.1591808896097872
          vf_explained_var: -0.36833399534225464
          vf_loss: 5.6487923838075304e-05
    num_agent_steps_sampled: 1339000
    num_agent_steps_trained: 1339000
    num_steps_sampled: 1339000
    num_steps_trained: 1339000
  iterations_since_restore: 1339
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1339,33191.2,1339000,0,0,0,349.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1340000
  custom_metrics: {}
  date: 2021-10-09_07-38-05
  done: false
  episode_len_mean: 349.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3731
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.656593550576104
          entropy_coeff: 0.009999999999999998
          kl: 0.01046158998034663
          policy_loss: -0.09385330018897851
          total_loss: -0.09829062985049354
          vf_explained_var: -0.5080026984214783
          vf_loss: 4.962266555392287e-05
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_steps_sampled: 1340000
    num_steps_trained: 1340000
  iterations_since_restore: 1340
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1340,33214.6,1340000,0,0,0,349.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1341000
  custom_metrics: {}
  date: 2021-10-09_07-38-30
  done: false
  episode_len_mean: 348.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3734
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.718699445989397
          entropy_coeff: 0.009999999999999998
          kl: 0.00937451785094103
          policy_loss: -0.018263763603236942
          total_loss: -0.02460644178920322
          vf_explained_var: -0.4456501007080078
          vf_loss: 2.046793014313961e-05
    num_agent_steps_sampled: 1341000
    num_agent_steps_trained: 1341000
    num_steps_sampled: 1341000
    num_steps_trained: 1341000
  iterations_since_restore: 1341
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1341,33239.6,1341000,0,0,0,348.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1342000
  custom_metrics: {}
  date: 2021-10-09_07-38-57
  done: false
  episode_len_mean: 347.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3737
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.412999481625027
          entropy_coeff: 0.009999999999999998
          kl: 0.010051260455844771
          policy_loss: -0.08845961613373624
          total_loss: -0.09097231469220585
          vf_explained_var: 0.037030335515737534
          vf_loss: 1.2079648644834783e-05
    num_agent_steps_sampled: 1342000
    num_agent_steps_trained: 1342000
    num_steps_sampled: 1342000
    num_steps_trained: 1342000
  iterations_since_restore: 1342


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1342,33266.3,1342000,0,0,0,347.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1343000
  custom_metrics: {}
  date: 2021-10-09_07-39-21
  done: false
  episode_len_mean: 347.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3740
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5459157678816053
          entropy_coeff: 0.009999999999999998
          kl: 0.01046018345945904
          policy_loss: -0.06415790677484538
          total_loss: -0.06751600098278787
          vf_explained_var: -0.2572997808456421
          vf_loss: 2.3705785073010095e-05
    num_agent_steps_sampled: 1343000
    num_agent_steps_trained: 1343000
    num_steps_sampled: 1343000
    num_steps_trained: 1343000
  iterations_since_restore: 1343
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1343,33290.4,1343000,0,0,0,347.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1344000
  custom_metrics: {}
  date: 2021-10-09_07-39-47
  done: false
  episode_len_mean: 345.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3743
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7688270224465263
          entropy_coeff: 0.009999999999999998
          kl: 0.012897074687811738
          policy_loss: -0.058511630569895105
          total_loss: -0.06128937142590682
          vf_explained_var: -0.19863754510879517
          vf_loss: 1.953027620200171e-05
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_steps_sampled: 1344000
    num_steps_trained: 1344000
  iterations_since_restore: 1344

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1344,33316,1344000,0,0,0,345.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1345000
  custom_metrics: {}
  date: 2021-10-09_07-40-10
  done: false
  episode_len_mean: 344.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3746
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.9876494553354052
          entropy_coeff: 0.009999999999999998
          kl: 0.010241159252397717
          policy_loss: -0.15672249275570113
          total_loss: -0.16475783831750354
          vf_explained_var: -0.46589481830596924
          vf_loss: 1.6677321643025304e-05
    num_agent_steps_sampled: 1345000
    num_agent_steps_trained: 1345000
    num_steps_sampled: 1345000
    num_steps_trained: 1345000
  iterations_since_restore: 1345

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1345,33339.1,1345000,0,0,0,344.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1346000
  custom_metrics: {}
  date: 2021-10-09_07-40-32
  done: false
  episode_len_mean: 344.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3748
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8067217667897542
          entropy_coeff: 0.009999999999999998
          kl: 0.011285544210350591
          policy_loss: -0.08039255113237434
          total_loss: -0.08541350753770935
          vf_explained_var: -0.5752397179603577
          vf_loss: 1.5936966413695093e-05
    num_agent_steps_sampled: 1346000
    num_agent_steps_trained: 1346000
    num_steps_sampled: 1346000
    num_steps_trained: 1346000
  iterations_since_restore: 1346


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1346,33361.5,1346000,0,0,0,344.56




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1347000
  custom_metrics: {}
  date: 2021-10-09_07-41-12
  done: false
  episode_len_mean: 345.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3751
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8198769092559814
          entropy_coeff: 0.009999999999999998
          kl: 0.01148251781196669
          policy_loss: -0.055095852952864434
          total_loss: -0.05998885465992822
          vf_explained_var: -0.21453936398029327
          vf_loss: 4.8017995919104175e-05
    num_agent_steps_sampled: 1347000
    num_agent_steps_trained: 1347000
    num_steps_sampled: 1347000
    num_steps_trained: 1347000
  iterations_since_restore: 1347

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1347,33401.3,1347000,0,0,0,345.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1348000
  custom_metrics: {}
  date: 2021-10-09_07-41-38
  done: false
  episode_len_mean: 344.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3754
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.823027859793769
          entropy_coeff: 0.009999999999999998
          kl: 0.010351635191354782
          policy_loss: -0.15967251948184438
          total_loss: -0.1658832598477602
          vf_explained_var: -0.04550592228770256
          vf_loss: 6.751011135040447e-05
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_steps_sampled: 1348000
    num_steps_trained: 1348000
  iterations_since_restore: 1348
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1348,33427,1348000,0,0,0,344.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1349000
  custom_metrics: {}
  date: 2021-10-09_07-42-01
  done: false
  episode_len_mean: 343.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3757
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7937319782045154
          entropy_coeff: 0.009999999999999998
          kl: 0.007784389519552428
          policy_loss: -0.16072250894374318
          total_loss: -0.16964060540000597
          vf_explained_var: -0.6860203146934509
          vf_loss: 3.1344821056538625e-05
    num_agent_steps_sampled: 1349000
    num_agent_steps_trained: 1349000
    num_steps_sampled: 1349000
    num_steps_trained: 1349000
  iterations_since_restore: 1349


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1349,33450.1,1349000,0,0,0,343.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1350000
  custom_metrics: {}
  date: 2021-10-09_07-42-23
  done: false
  episode_len_mean: 345.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3760
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.362789769967397
          entropy_coeff: 0.009999999999999998
          kl: 0.006180121211780292
          policy_loss: 0.016377341022921932
          total_loss: 0.009901410734487904
          vf_explained_var: -0.2482297271490097
          vf_loss: 1.6381441289720695e-05
    num_agent_steps_sampled: 1350000
    num_agent_steps_trained: 1350000
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
  iterations_since_restore: 1350
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1350,33471.8,1350000,0,0,0,345.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1351000
  custom_metrics: {}
  date: 2021-10-09_07-42-46
  done: false
  episode_len_mean: 344.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3762
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8468997875849407
          entropy_coeff: 0.009999999999999998
          kl: 0.010714230656054034
          policy_loss: -0.15971523403293555
          total_loss: -0.1658047874354654
          vf_explained_var: -0.012469743378460407
          vf_loss: 8.76186499378188e-06
    num_agent_steps_sampled: 1351000
    num_agent_steps_trained: 1351000
    num_steps_sampled: 1351000
    num_steps_trained: 1351000
  iterations_since_restore: 1351
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1351,33495,1351000,0,0,0,344.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1352000
  custom_metrics: {}
  date: 2021-10-09_07-43-09
  done: false
  episode_len_mean: 345.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3765
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8296476562817892
          entropy_coeff: 0.009999999999999998
          kl: 0.00815300158741701
          policy_loss: -0.06115113499884804
          total_loss: -0.07002012618920869
          vf_explained_var: -0.8881466388702393
          vf_loss: 1.400706510139571e-05
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_steps_sampled: 1352000
    num_steps_trained: 1352000
  iterations_since_restore: 1352
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1352,33517.8,1352000,0,0,0,345.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1353000
  custom_metrics: {}
  date: 2021-10-09_07-43-34
  done: false
  episode_len_mean: 344.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3768
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5114671236938901
          entropy_coeff: 0.009999999999999998
          kl: 0.00862064066453474
          policy_loss: -0.07978830277505848
          total_loss: -0.0849373794057303
          vf_explained_var: -0.5642889738082886
          vf_loss: 1.2174925152268973e-05
    num_agent_steps_sampled: 1353000
    num_agent_steps_trained: 1353000
    num_steps_sampled: 1353000
    num_steps_trained: 1353000
  iterations_since_restore: 1353
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1353,33543.2,1353000,0,0,0,344.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1354000
  custom_metrics: {}
  date: 2021-10-09_07-43-58
  done: false
  episode_len_mean: 342.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3771
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6640818476676942
          entropy_coeff: 0.009999999999999998
          kl: 0.010835434037443529
          policy_loss: -0.04303545533782906
          total_loss: -0.04714492323497931
          vf_explained_var: -0.3482450246810913
          vf_loss: 2.072692523142905e-05
    num_agent_steps_sampled: 1354000
    num_agent_steps_trained: 1354000
    num_steps_sampled: 1354000
    num_steps_trained: 1354000
  iterations_since_restore: 1354
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1354,33566.7,1354000,0,0,0,342.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1355000
  custom_metrics: {}
  date: 2021-10-09_07-44-19
  done: false
  episode_len_mean: 342.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3774
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6356670591566298
          entropy_coeff: 0.009999999999999998
          kl: 0.010870355814319587
          policy_loss: -0.04427540558907721
          total_loss: -0.04801840815279219
          vf_explained_var: -0.4864615499973297
          vf_loss: 6.272258883149738e-05
    num_agent_steps_sampled: 1355000
    num_agent_steps_trained: 1355000
    num_steps_sampled: 1355000
    num_steps_trained: 1355000
  iterations_since_restore: 1355
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1355,33588,1355000,0,0,0,342.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1356000
  custom_metrics: {}
  date: 2021-10-09_07-44-41
  done: false
  episode_len_mean: 344.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3776
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8484343065155877
          entropy_coeff: 0.009999999999999998
          kl: 0.012563526738886843
          policy_loss: -0.09677269613991181
          total_loss: -0.10072899444235696
          vf_explained_var: -0.454355925321579
          vf_loss: 2.2159409208042132e-05
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_steps_sampled: 1356000
    num_steps_trained: 1356000
  iterations_since_restore: 1356
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1356,33610.3,1356000,0,0,0,344.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1357000
  custom_metrics: {}
  date: 2021-10-09_07-45-01
  done: false
  episode_len_mean: 345.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3779
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.5689583831363254
          entropy_coeff: 0.009999999999999998
          kl: 0.007172504229752658
          policy_loss: -0.08428018941647476
          total_loss: -0.09167551493479145
          vf_explained_var: -0.0687122642993927
          vf_loss: 1.2863320454117557e-05
    num_agent_steps_sampled: 1357000
    num_agent_steps_trained: 1357000
    num_steps_sampled: 1357000
    num_steps_trained: 1357000
  iterations_since_restore: 1357


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1357,33630.3,1357000,0,0,0,345.75




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1358000
  custom_metrics: {}
  date: 2021-10-09_07-45-43
  done: false
  episode_len_mean: 344.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3782
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.8458443734380934
          entropy_coeff: 0.009999999999999998
          kl: 0.009660971274107134
          policy_loss: -0.0951326390521394
          total_loss: -0.10241568421738015
          vf_explained_var: -0.7733373641967773
          vf_loss: 2.0812427985422093e-05
    num_agent_steps_sampled: 1358000
    num_agent_steps_trained: 1358000
    num_steps_sampled: 1358000
    num_steps_trained: 1358000
  iterations_since_restore: 1358
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1358,33672.3,1358000,0,0,0,344.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1359000
  custom_metrics: {}
  date: 2021-10-09_07-46-09
  done: false
  episode_len_mean: 343.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3785
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.644558572769165
          entropy_coeff: 0.009999999999999998
          kl: 0.010406480777995341
          policy_loss: -0.08333049246834384
          total_loss: -0.08766914177685976
          vf_explained_var: -0.836894154548645
          vf_loss: 9.158269847123139e-05
    num_agent_steps_sampled: 1359000
    num_agent_steps_trained: 1359000
    num_steps_sampled: 1359000
    num_steps_trained: 1359000
  iterations_since_restore: 1359
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1359,33697.3,1359000,0,0,0,343.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1360000
  custom_metrics: {}
  date: 2021-10-09_07-46-35
  done: false
  episode_len_mean: 342.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3788
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.6443218734529284
          entropy_coeff: 0.009999999999999998
          kl: 0.006759650011939774
          policy_loss: 0.06396597870108155
          total_loss: 0.055340735821260346
          vf_explained_var: 0.14838463068008423
          vf_loss: 1.3262617716868085e-05
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
  iterations_since_restore: 1360
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1360,33724,1360000,0,0,0,342.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1361000
  custom_metrics: {}
  date: 2021-10-09_07-47-01
  done: false
  episode_len_mean: 343.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3791
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.7535705473687915
          entropy_coeff: 0.009999999999999998
          kl: 0.009461932454125745
          policy_loss: 0.008265902412434419
          total_loss: 0.001668717877732383
          vf_explained_var: -0.4801081717014313
          vf_loss: 1.3745812030821172e-05
    num_agent_steps_sampled: 1361000
    num_agent_steps_trained: 1361000
    num_steps_sampled: 1361000
    num_steps_trained: 1361000
  iterations_since_restore: 1361


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1361,33749.6,1361000,0,0,0,343.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1362000
  custom_metrics: {}
  date: 2021-10-09_07-47-25
  done: false
  episode_len_mean: 343.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3794
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1546030312776565
          cur_lr: 5.000000000000001e-05
          entropy: 1.9049434489674038
          entropy_coeff: 0.009999999999999998
          kl: 0.0029459944226862453
          policy_loss: 0.058792741907139616
          total_loss: 0.04315557668192519
          vf_explained_var: -0.5362095832824707
          vf_loss: 1.081526759207918e-05
    num_agent_steps_sampled: 1362000
    num_agent_steps_trained: 1362000
    num_steps_sampled: 1362000
    num_steps_trained: 1362000
  iterations_since_restore: 1362
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1362,33773.7,1362000,0,0,0,343.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1363000
  custom_metrics: {}
  date: 2021-10-09_07-47-49
  done: false
  episode_len_mean: 342.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 3796
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.829317327340444
          entropy_coeff: 0.009999999999999998
          kl: 0.012491353388702858
          policy_loss: -0.0652233646147781
          total_loss: -0.0762992497947481
          vf_explained_var: 0.04400205984711647
          vf_loss: 6.010353766012284e-06
    num_agent_steps_sampled: 1363000
    num_agent_steps_trained: 1363000
    num_steps_sampled: 1363000
    num_steps_trained: 1363000
  iterations_since_restore: 1363
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1363,33797.8,1363000,0,0,0,342.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1364000
  custom_metrics: {}
  date: 2021-10-09_07-48-12
  done: false
  episode_len_mean: 343.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3799
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6344842036565146
          entropy_coeff: 0.009999999999999998
          kl: 0.012615843776898178
          policy_loss: -0.05019990847342544
          total_loss: -0.05923984758555889
          vf_explained_var: -0.31443437933921814
          vf_loss: 2.175803907044206e-05
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_steps_sampled: 1364000
    num_steps_trained: 1364000
  iterations_since_restore: 1364


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1364,33820.9,1364000,0,0,0,343.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1365000
  custom_metrics: {}
  date: 2021-10-09_07-48-37
  done: false
  episode_len_mean: 343.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3802
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6374312467045253
          entropy_coeff: 0.009999999999999998
          kl: 0.013170328779077107
          policy_loss: -0.045969029660854074
          total_loss: -0.0547089000335998
          vf_explained_var: -0.7339094281196594
          vf_loss: 3.1189083519065106e-05
    num_agent_steps_sampled: 1365000
    num_agent_steps_trained: 1365000
    num_steps_sampled: 1365000
    num_steps_trained: 1365000
  iterations_since_restore: 1365


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1365,33845.4,1365000,0,0,0,343.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1366000
  custom_metrics: {}
  date: 2021-10-09_07-49-03
  done: false
  episode_len_mean: 344.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3805
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6881720238261753
          entropy_coeff: 0.009999999999999998
          kl: 0.017043251064494284
          policy_loss: -0.043764637741777634
          total_loss: -0.050630380130476424
          vf_explained_var: -0.749092161655426
          vf_loss: 0.00017688454252796267
    num_agent_steps_sampled: 1366000
    num_agent_steps_trained: 1366000
    num_steps_sampled: 1366000
    num_steps_trained: 1366000
  iterations_since_restore: 1366

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1366,33871.3,1366000,0,0,0,344.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1367000
  custom_metrics: {}
  date: 2021-10-09_07-49-28
  done: false
  episode_len_mean: 344.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3808
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.8708748035960727
          entropy_coeff: 0.009999999999999998
          kl: 0.01570184155404285
          policy_loss: -0.11345173244675001
          total_loss: -0.1230572153503696
          vf_explained_var: -0.345508337020874
          vf_loss: 3.857108963529754e-05
    num_agent_steps_sampled: 1367000
    num_agent_steps_trained: 1367000
    num_steps_sampled: 1367000
    num_steps_trained: 1367000
  iterations_since_restore: 1367
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1367,33896.7,1367000,0,0,0,344.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1368000
  custom_metrics: {}
  date: 2021-10-09_07-50-10
  done: false
  episode_len_mean: 345.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3811
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.709016462167104
          entropy_coeff: 0.009999999999999998
          kl: 0.01597086215220429
          policy_loss: -0.1714041858083672
          total_loss: -0.17926427216993437
          vf_explained_var: -0.15236908197402954
          vf_loss: 1.0075047427764933e-05
    num_agent_steps_sampled: 1368000
    num_agent_steps_trained: 1368000
    num_steps_sampled: 1368000
    num_steps_trained: 1368000
  iterations_since_restore: 1368
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1368,33938.9,1368000,0,0,0,345.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1369000
  custom_metrics: {}
  date: 2021-10-09_07-50-36
  done: false
  episode_len_mean: 344.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3814
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.765298725499047
          entropy_coeff: 0.009999999999999998
          kl: 0.014850796195568073
          policy_loss: -0.11300683257480462
          total_loss: -0.12207360429068406
          vf_explained_var: -0.06166641041636467
          vf_loss: 1.2827561648818017e-05
    num_agent_steps_sampled: 1369000
    num_agent_steps_trained: 1369000
    num_steps_sampled: 1369000
    num_steps_trained: 1369000
  iterations_since_restore: 1369


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1369,33964.9,1369000,0,0,0,344.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1370000
  custom_metrics: {}
  date: 2021-10-09_07-50-59
  done: false
  episode_len_mean: 345.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3817
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.7798255840937296
          entropy_coeff: 0.009999999999999998
          kl: 0.013976873761341263
          policy_loss: -0.05017581909067101
          total_loss: -0.05988654906137122
          vf_explained_var: -0.43980249762535095
          vf_loss: 1.8655084992739528e-05
    num_agent_steps_sampled: 1370000
    num_agent_steps_trained: 1370000
    num_steps_sampled: 1370000
    num_steps_trained: 1370000
  iterations_since_restore: 1370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1370,33987.5,1370000,0,0,0,345.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1371000
  custom_metrics: {}
  date: 2021-10-09_07-51-24
  done: false
  episode_len_mean: 346.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3820
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6681982768906487
          entropy_coeff: 0.009999999999999998
          kl: 0.015369028971123397
          policy_loss: -0.06209543715748522
          total_loss: -0.06988192122015688
          vf_explained_var: -0.358198344707489
          vf_loss: 2.293649030333553e-05
    num_agent_steps_sampled: 1371000
    num_agent_steps_trained: 1371000
    num_steps_sampled: 1371000
    num_steps_trained: 1371000
  iterations_since_restore: 1371
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1371,34012.4,1371000,0,0,0,346.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1372000
  custom_metrics: {}
  date: 2021-10-09_07-51-49
  done: false
  episode_len_mean: 345.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3823
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.7160869677861532
          entropy_coeff: 0.009999999999999998
          kl: 0.012729360509227227
          policy_loss: -0.0796742659372588
          total_loss: -0.08946842658850881
          vf_explained_var: -0.3819342255592346
          vf_loss: 1.8028650134106605e-05
    num_agent_steps_sampled: 1372000
    num_agent_steps_trained: 1372000
    num_steps_sampled: 1372000
    num_steps_trained: 1372000
  iterations_since_restore: 1372
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1372,34037.3,1372000,0,0,0,345.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1373000
  custom_metrics: {}
  date: 2021-10-09_07-52-15
  done: false
  episode_len_mean: 345.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 3826
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6574378861321344
          entropy_coeff: 0.009999999999999998
          kl: 0.02375636860489687
          policy_loss: -0.0856538689798779
          total_loss: -0.08849455739061038
          vf_explained_var: -0.11467823386192322
          vf_loss: 1.910341235695038e-05
    num_agent_steps_sampled: 1373000
    num_agent_steps_trained: 1373000
    num_steps_sampled: 1373000
    num_steps_trained: 1373000
  iterations_since_restore: 1373
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1373,34063.6,1373000,0,0,0,345.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1374000
  custom_metrics: {}
  date: 2021-10-09_07-52-39
  done: false
  episode_len_mean: 346.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3829
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.6426972336239285
          entropy_coeff: 0.009999999999999998
          kl: 0.016236066753302535
          policy_loss: -0.05747832390997145
          total_loss: 0.04943108426200019
          vf_explained_var: -0.4502373933792114
          vf_loss: 0.10927671651912128
    num_agent_steps_sampled: 1374000
    num_agent_steps_trained: 1374000
    num_steps_sampled: 1374000
    num_steps_trained: 1374000
  iterations_since_restore: 1374
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1374,34087.6,1374000,-0.13,0,-13,346.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1375000
  custom_metrics: {}
  date: 2021-10-09_07-53-05
  done: false
  episode_len_mean: 345.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3832
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.4474366770850287
          entropy_coeff: 0.009999999999999998
          kl: 0.02755759595873231
          policy_loss: -0.0848848848707146
          total_loss: -0.06523750805192524
          vf_explained_var: -0.0515429712831974
          vf_loss: 0.010258183718865945
    num_agent_steps_sampled: 1375000
    num_agent_steps_trained: 1375000
    num_steps_sampled: 1375000
    num_steps_trained: 1375000
  iterations_since_restore: 1375
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1375,34113.7,1375000,-0.13,0,-13,345.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1376000
  custom_metrics: {}
  date: 2021-10-09_07-53-29
  done: false
  episode_len_mean: 346.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3835
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.538378718164232
          entropy_coeff: 0.009999999999999998
          kl: 0.00796100990432341
          policy_loss: -0.012118053353495068
          total_loss: 0.12918834330307113
          vf_explained_var: 0.04507764056324959
          vf_loss: 0.14634939899875057
    num_agent_steps_sampled: 1376000
    num_agent_steps_trained: 1376000
    num_steps_sampled: 1376000
    num_steps_trained: 1376000
  iterations_since_restore: 1376
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1376,34137.4,1376000,-0.25,0,-13,346.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1377000
  custom_metrics: {}
  date: 2021-10-09_07-53-54
  done: false
  episode_len_mean: 346.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.44
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3838
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4402519080373977
          entropy_coeff: 0.009999999999999998
          kl: 0.008055139092354047
          policy_loss: -0.044125819164845675
          total_loss: 0.21660740574200948
          vf_explained_var: -0.1655941903591156
          vf_loss: 0.26467269112666447
    num_agent_steps_sampled: 1377000
    num_agent_steps_trained: 1377000
    num_steps_sampled: 1377000
    num_steps_trained: 1377000
  iterations_since_restore: 1377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1377,34162.9,1377000,-0.44,0,-13,346.66




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1378000
  custom_metrics: {}
  date: 2021-10-09_07-54-36
  done: false
  episode_len_mean: 347.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3841
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.0953123072783153
          entropy_coeff: 0.009999999999999998
          kl: 0.008809901455443183
          policy_loss: -0.05665085208084848
          total_loss: 0.033084761682483886
          vf_explained_var: 0.6885430216789246
          vf_loss: 0.08924530343049103
    num_agent_steps_sampled: 1378000
    num_agent_steps_trained: 1378000
    num_steps_sampled: 1378000
    num_steps_trained: 1378000
  iterations_since_restore: 1378


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1378,34204.3,1378000,-0.47,0,-13,347.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1379000
  custom_metrics: {}
  date: 2021-10-09_07-55-02
  done: false
  episode_len_mean: 346.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3844
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7141536288791233
          entropy_coeff: 0.009999999999999998
          kl: 0.018049153337503142
          policy_loss: -0.05142827820446756
          total_loss: -0.027490892426835167
          vf_explained_var: -0.05700123682618141
          vf_loss: 0.017634364046777286
    num_agent_steps_sampled: 1379000
    num_agent_steps_trained: 1379000
    num_steps_sampled: 1379000
    num_steps_trained: 1379000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1379,34230.4,1379000,-0.47,0,-13,346.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1380000
  custom_metrics: {}
  date: 2021-10-09_07-55-27
  done: false
  episode_len_mean: 346.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3847
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.758434936735365
          entropy_coeff: 0.009999999999999998
          kl: 0.011246438303010278
          policy_loss: -0.007384646559755007
          total_loss: -0.0003071904182434082
          vf_explained_var: -0.30848488211631775
          vf_loss: 0.010053490350643794
    num_agent_steps_sampled: 1380000
    num_agent_steps_trained: 1380000
    num_steps_sampled: 1380000
    num_steps_trained: 1380000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1380,34255.8,1380000,-0.47,0,-13,346.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1381000
  custom_metrics: {}
  date: 2021-10-09_07-55-54
  done: false
  episode_len_mean: 344.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3850
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7844865944650439
          entropy_coeff: 0.009999999999999998
          kl: 0.010350784775077561
          policy_loss: -0.078620429088672
          total_loss: -0.07757576368749142
          vf_explained_var: -0.15068039298057556
          vf_loss: 0.005444599951927861
    num_agent_steps_sampled: 1381000
    num_agent_steps_trained: 1381000
    num_steps_sampled: 1381000
    num_steps_trained: 1381000
  iterations_since_restore: 1381

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1381,34282.1,1381000,-0.47,0,-13,344.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1382000
  custom_metrics: {}
  date: 2021-10-09_07-56-20
  done: false
  episode_len_mean: 343.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3853
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6669581996070013
          entropy_coeff: 0.009999999999999998
          kl: 0.015275393475511675
          policy_loss: -0.07732183933258056
          total_loss: -0.06076548960473802
          vf_explained_var: -0.3588698208332062
          vf_loss: 0.013384291146778398
    num_agent_steps_sampled: 1382000
    num_agent_steps_trained: 1382000
    num_steps_sampled: 1382000
    num_steps_trained: 1382000
  iterations_since_restore: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1382,34308.9,1382000,-0.47,0,-13,343.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1383000
  custom_metrics: {}
  date: 2021-10-09_07-56-47
  done: false
  episode_len_mean: 342.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3856
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7923406256569756
          entropy_coeff: 0.009999999999999998
          kl: 0.008777853096145292
          policy_loss: -0.08686421915060943
          total_loss: -0.09084701538085938
          vf_explained_var: -0.4911791980266571
          vf_loss: 0.002538806788571593
    num_agent_steps_sampled: 1383000
    num_agent_steps_trained: 1383000
    num_steps_sampled: 1383000
    num_steps_trained: 1383000
  iterations_since_restore: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1383,34335.8,1383000,-0.47,0,-13,342.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1384000
  custom_metrics: {}
  date: 2021-10-09_07-57-13
  done: false
  episode_len_mean: 340.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 4
  episodes_total: 3860
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.378211506207784
          entropy_coeff: 0.009999999999999998
          kl: 0.007412222990562138
          policy_loss: 0.013128560615910424
          total_loss: 0.09024635404348373
          vf_explained_var: 0.12899024784564972
          vf_loss: 0.08127196413568324
    num_agent_steps_sampled: 1384000
    num_agent_steps_trained: 1384000
    num_steps_sampled: 1384000
    num_steps_trained: 1384000
  iterations_since_restore: 1384
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1384,34361,1384000,-0.52,0,-13,340.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1385000
  custom_metrics: {}
  date: 2021-10-09_07-57-38
  done: false
  episode_len_mean: 339.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3863
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5911988536516826
          entropy_coeff: 0.009999999999999998
          kl: 0.009426530463506645
          policy_loss: -0.1136718259503444
          total_loss: -0.10976870788468254
          vf_explained_var: -0.4387052357196808
          vf_loss: 0.007570716154037251
    num_agent_steps_sampled: 1385000
    num_agent_steps_trained: 1385000
    num_steps_sampled: 1385000
    num_steps_trained: 1385000
  iterations_since_restore: 1385


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1385,34386.5,1385000,-0.52,0,-13,339.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1386000
  custom_metrics: {}
  date: 2021-10-09_07-58-05
  done: false
  episode_len_mean: 337.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3866
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4916597180896336
          entropy_coeff: 0.009999999999999998
          kl: 0.009787884897447145
          policy_loss: -0.06290276389982966
          total_loss: -0.05023233340018325
          vf_explained_var: 0.15049462020397186
          vf_loss: 0.01487326773090495
    num_agent_steps_sampled: 1386000
    num_agent_steps_trained: 1386000
    num_steps_sampled: 1386000
    num_steps_trained: 1386000
  iterations_since_restore: 1386

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1386,34413.2,1386000,-0.52,0,-13,337.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1387000
  custom_metrics: {}
  date: 2021-10-09_07-58-31
  done: false
  episode_len_mean: 337.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3869
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5639640039867826
          entropy_coeff: 0.009999999999999998
          kl: 0.009506615314667973
          policy_loss: -0.1340935293585062
          total_loss: -0.12822381858196524
          vf_explained_var: -0.22951596975326538
          vf_loss: 0.009160932901108431
    num_agent_steps_sampled: 1387000
    num_agent_steps_trained: 1387000
    num_steps_sampled: 1387000
    num_steps_trained: 1387000
  iterations_since_restore: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1387,34439.6,1387000,-0.52,0,-13,337.18




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1388000
  custom_metrics: {}
  date: 2021-10-09_07-59-16
  done: false
  episode_len_mean: 335.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3872
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.774415953954061
          entropy_coeff: 0.009999999999999998
          kl: 0.009662454677482262
          policy_loss: -0.12648615547352368
          total_loss: -0.1207368633399407
          vf_explained_var: -0.35580673813819885
          vf_loss: 0.010942611460470491
    num_agent_steps_sampled: 1388000
    num_agent_steps_trained: 1388000
    num_steps_sampled: 1388000
    num_steps_trained: 1388000
  iterations_since_restore: 1388

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1388,34484.1,1388000,-0.52,0,-13,335.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1389000
  custom_metrics: {}
  date: 2021-10-09_07-59-40
  done: false
  episode_len_mean: 334.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3875
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.719926687081655
          entropy_coeff: 0.009999999999999998
          kl: 0.010193027872892241
          policy_loss: -0.11095737231274445
          total_loss: -0.11101508293714789
          vf_explained_var: -0.9181743860244751
          vf_loss: 0.00390154476562101
    num_agent_steps_sampled: 1389000
    num_agent_steps_trained: 1389000
    num_steps_sampled: 1389000
    num_steps_trained: 1389000
  iterations_since_restore: 1389


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1389,34508.7,1389000,-0.52,0,-13,334.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1390000
  custom_metrics: {}
  date: 2021-10-09_08-00-04
  done: false
  episode_len_mean: 333.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3878
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.81684164736006
          entropy_coeff: 0.009999999999999998
          kl: 0.012555583618675639
          policy_loss: -0.08069991833633847
          total_loss: -0.07769695777032111
          vf_explained_var: -0.5440357327461243
          vf_loss: 0.0048625752743747495
    num_agent_steps_sampled: 1390000
    num_agent_steps_trained: 1390000
    num_steps_sampled: 1390000
    num_steps_trained: 1390000
  iterations_since_restore: 1390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1390,34532.2,1390000,-0.52,0,-13,333.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1391000
  custom_metrics: {}
  date: 2021-10-09_08-00-30
  done: false
  episode_len_mean: 330.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3881
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.837206678920322
          entropy_coeff: 0.009999999999999998
          kl: 0.007958026396036723
          policy_loss: -0.07502184599224064
          total_loss: -0.07874852892839246
          vf_explained_var: -0.49972522258758545
          vf_loss: 0.0043084786661590135
    num_agent_steps_sampled: 1391000
    num_agent_steps_trained: 1391000
    num_steps_sampled: 1391000
    num_steps_trained: 1391000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1391,34558.2,1391000,-0.52,0,-13,330.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1392000
  custom_metrics: {}
  date: 2021-10-09_08-00-55
  done: false
  episode_len_mean: 331.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3884
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6587135500378078
          entropy_coeff: 0.009999999999999998
          kl: 0.009123376068954058
          policy_loss: -0.09327243483728832
          total_loss: -0.09432088161508242
          vf_explained_var: -0.6094356775283813
          vf_loss: 0.003688075797011455
    num_agent_steps_sampled: 1392000
    num_agent_steps_trained: 1392000
    num_steps_sampled: 1392000
    num_steps_trained: 1392000
  iterations_since_restore: 139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1392,34583.1,1392000,-0.52,0,-13,331.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1393000
  custom_metrics: {}
  date: 2021-10-09_08-01-21
  done: false
  episode_len_mean: 331.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3887
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8961803714434307
          entropy_coeff: 0.009999999999999998
          kl: 0.009832557318403466
          policy_loss: -0.09870841051969263
          total_loss: -0.10291565222044786
          vf_explained_var: -0.5846840143203735
          vf_loss: 0.001982772924626867
    num_agent_steps_sampled: 1393000
    num_agent_steps_trained: 1393000
    num_steps_sampled: 1393000
    num_steps_trained: 1393000
  iterations_since_restore: 139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1393,34609.3,1393000,-0.52,0,-13,331.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1394000
  custom_metrics: {}
  date: 2021-10-09_08-01-45
  done: false
  episode_len_mean: 332.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3890
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6426717652214897
          entropy_coeff: 0.009999999999999998
          kl: 0.008296491358008515
          policy_loss: -0.04101217930308647
          total_loss: -0.04489531387678451
          vf_explained_var: -1.0
          vf_loss: 0.0017670355957104928
    num_agent_steps_sampled: 1394000
    num_agent_steps_trained: 1394000
    num_steps_sampled: 1394000
    num_steps_trained: 1394000
  iterations_since_restore: 1394
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1394,34632.9,1394000,-0.52,0,-13,332.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1395000
  custom_metrics: {}
  date: 2021-10-09_08-02-10
  done: false
  episode_len_mean: 331.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3893
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6982322984271578
          entropy_coeff: 0.009999999999999998
          kl: 0.006675019144937247
          policy_loss: -0.12446733539303144
          total_loss: -0.12965650202499496
          vf_explained_var: -0.4357527494430542
          vf_loss: 0.003122787973067413
    num_agent_steps_sampled: 1395000
    num_agent_steps_trained: 1395000
    num_steps_sampled: 1395000
    num_steps_trained: 1395000
  iterations_since_restore: 139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1395,34657.8,1395000,-0.52,0,-13,331.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1396000
  custom_metrics: {}
  date: 2021-10-09_08-02-35
  done: false
  episode_len_mean: 330.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3896
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5956784897380405
          entropy_coeff: 0.009999999999999998
          kl: 0.009933881260593818
          policy_loss: -0.076305929157469
          total_loss: -0.07713773440983561
          vf_explained_var: -0.7932291626930237
          vf_loss: 0.0022215780730928396
    num_agent_steps_sampled: 1396000
    num_agent_steps_trained: 1396000
    num_steps_sampled: 1396000
    num_steps_trained: 1396000
  iterations_since_restore: 1396


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1396,34683.2,1396000,-0.52,0,-13,330.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1397000
  custom_metrics: {}
  date: 2021-10-09_08-02-58
  done: false
  episode_len_mean: 330.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3899
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7325030896398756
          entropy_coeff: 0.009999999999999998
          kl: 0.009797461387463575
          policy_loss: -0.06616119721697436
          total_loss: -0.06889306613140636
          vf_explained_var: -0.8985707759857178
          vf_loss: 0.0018669601102980474
    num_agent_steps_sampled: 1397000
    num_agent_steps_trained: 1397000
    num_steps_sampled: 1397000
    num_steps_trained: 1397000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1397,34706.4,1397000,-0.52,0,-13,330.21




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1398000
  custom_metrics: {}
  date: 2021-10-09_08-03-39
  done: false
  episode_len_mean: 330.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3902
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6789616664250693
          entropy_coeff: 0.009999999999999998
          kl: 0.010582951278665891
          policy_loss: -0.06908652356101407
          total_loss: -0.07050285012357765
          vf_explained_var: -0.7768049240112305
          vf_loss: 0.0016267932369373739
    num_agent_steps_sampled: 1398000
    num_agent_steps_trained: 1398000
    num_steps_sampled: 1398000
    num_steps_trained: 1398000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1398,34746.9,1398000,-0.52,0,-13,330.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1399000
  custom_metrics: {}
  date: 2021-10-09_08-04-03
  done: false
  episode_len_mean: 331.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3905
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.0116803619596695
          entropy_coeff: 0.009999999999999998
          kl: 0.00849392649714916
          policy_loss: -0.12985557123190827
          total_loss: -0.1371893619497617
          vf_explained_var: -0.7786522507667542
          vf_loss: 0.00175001092074025
    num_agent_steps_sampled: 1399000
    num_agent_steps_trained: 1399000
    num_steps_sampled: 1399000
    num_steps_trained: 1399000
  iterations_since_restore: 1399
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1399,34771.4,1399000,-0.52,0,-13,331.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1400000
  custom_metrics: {}
  date: 2021-10-09_08-04-30
  done: false
  episode_len_mean: 331.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3908
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.0209786918428208
          entropy_coeff: 0.009999999999999998
          kl: 0.007859440485093946
          policy_loss: -0.05409296262595389
          total_loss: -0.06300407364550564
          vf_explained_var: -0.9998829960823059
          vf_loss: 0.0010898268047539103
    num_agent_steps_sampled: 1400000
    num_agent_steps_trained: 1400000
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1400,34797.7,1400000,-0.52,0,-13,331.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1401000
  custom_metrics: {}
  date: 2021-10-09_08-04-57
  done: false
  episode_len_mean: 331.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3911
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.713352182176378
          entropy_coeff: 0.009999999999999998
          kl: 0.010210812612233788
          policy_loss: -0.09390209190961388
          total_loss: -0.09668491116414467
          vf_explained_var: -0.4481731951236725
          vf_loss: 0.0010875903163752002
    num_agent_steps_sampled: 1401000
    num_agent_steps_trained: 1401000
    num_steps_sampled: 1401000
    num_steps_trained: 1401000
  iterations_since_restore: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1401,34824.6,1401000,-0.52,0,-13,331.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1402000
  custom_metrics: {}
  date: 2021-10-09_08-05-23
  done: false
  episode_len_mean: 331.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3914
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9089419272210864
          entropy_coeff: 0.009999999999999998
          kl: 0.010102048182599037
          policy_loss: -0.1118590620242887
          total_loss: -0.11655115890834067
          vf_explained_var: -0.5537129640579224
          vf_loss: 0.001275484297528035
    num_agent_steps_sampled: 1402000
    num_agent_steps_trained: 1402000
    num_steps_sampled: 1402000
    num_steps_trained: 1402000
  iterations_since_restore: 1402

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1402,34851.2,1402000,-0.52,0,-13,331.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1403000
  custom_metrics: {}
  date: 2021-10-09_08-05-49
  done: false
  episode_len_mean: 331.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3917
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.751744102107154
          entropy_coeff: 0.009999999999999998
          kl: 0.008486043339709563
          policy_loss: -0.091175673627812
          total_loss: -0.09620197953449355
          vf_explained_var: -1.0
          vf_loss: 0.0014683705515279952
    num_agent_steps_sampled: 1403000
    num_agent_steps_trained: 1403000
    num_steps_sampled: 1403000
    num_steps_trained: 1403000
  iterations_since_restore: 1403
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1403,34876.5,1403000,-0.52,0,-13,331.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1404000
  custom_metrics: {}
  date: 2021-10-09_08-06-16
  done: false
  episode_len_mean: 330.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3920
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7655409031444125
          entropy_coeff: 0.009999999999999998
          kl: 0.006872486520298359
          policy_loss: -0.13760591758829024
          total_loss: -0.14587822287446922
          vf_explained_var: -1.0
          vf_loss: 0.00045623413510232545
    num_agent_steps_sampled: 1404000
    num_agent_steps_trained: 1404000
    num_steps_sampled: 1404000
    num_steps_trained: 1404000
  iterations_since_restore: 1404
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1404,34904.1,1404000,-0.52,0,-13,330.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1405000
  custom_metrics: {}
  date: 2021-10-09_08-06-42
  done: false
  episode_len_mean: 330.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3923
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6286846107906765
          entropy_coeff: 0.009999999999999998
          kl: 0.008397623021504054
          policy_loss: -0.05122145791020658
          total_loss: -0.05525982425444656
          vf_explained_var: -0.9905436038970947
          vf_loss: 0.0013405707409320812
    num_agent_steps_sampled: 1405000
    num_agent_steps_trained: 1405000
    num_steps_sampled: 1405000
    num_steps_trained: 1405000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1405,34929.9,1405000,-0.52,0,-13,330.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1406000
  custom_metrics: {}
  date: 2021-10-09_08-07-07
  done: false
  episode_len_mean: 331.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.52
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 3926
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8104284127553305
          entropy_coeff: 0.009999999999999998
          kl: 0.011039644163018872
          policy_loss: -0.10824798776043786
          total_loss: -0.11074583565609322
          vf_explained_var: -1.0
          vf_loss: 0.0012667292405644224
    num_agent_steps_sampled: 1406000
    num_agent_steps_trained: 1406000
    num_steps_sampled: 1406000
    num_steps_trained: 1406000
  iterations_since_restore: 1406
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1406,34954.7,1406000,-0.52,0,-13,331.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1407000
  custom_metrics: {}
  date: 2021-10-09_08-07-34
  done: false
  episode_len_mean: 330.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.39
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3929
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.743067193031311
          entropy_coeff: 0.009999999999999998
          kl: 0.011299759140941508
          policy_loss: -0.10574444768329462
          total_loss: -0.10794506996042198
          vf_explained_var: -0.6134316921234131
          vf_loss: 0.0005524722293355606
    num_agent_steps_sampled: 1407000
    num_agent_steps_trained: 1407000
    num_steps_sampled: 1407000
    num_steps_trained: 1407000
  iterations_since_restore: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1407,34982.2,1407000,-0.39,0,-12,330.17




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1408000
  custom_metrics: {}
  date: 2021-10-09_08-08-19
  done: false
  episode_len_mean: 329.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.39
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3932
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6867113682958814
          entropy_coeff: 0.009999999999999998
          kl: 0.010169474136292836
          policy_loss: -0.11919915897564756
          total_loss: -0.1222729220572445
          vf_explained_var: -0.78398197889328
          vf_loss: 0.0005839310075518571
    num_agent_steps_sampled: 1408000
    num_agent_steps_trained: 1408000
    num_steps_sampled: 1408000
    num_steps_trained: 1408000
  iterations_since_restore: 1408


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1408,35026.5,1408000,-0.39,0,-12,329.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1409000
  custom_metrics: {}
  date: 2021-10-09_08-08-46
  done: false
  episode_len_mean: 328.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.27
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 3935
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.569142480691274
          entropy_coeff: 0.009999999999999998
          kl: 0.007396167547285238
          policy_loss: 0.004682170620395078
          total_loss: -0.0008673512066404025
          vf_explained_var: -0.20079536736011505
          vf_loss: 0.0005348108873780196
    num_agent_steps_sampled: 1409000
    num_agent_steps_trained: 1409000
    num_steps_sampled: 1409000
    num_steps_trained: 1409000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1409,35054.1,1409000,-0.27,0,-12,328.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1410000
  custom_metrics: {}
  date: 2021-10-09_08-09-11
  done: false
  episode_len_mean: 327.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 3938
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7937182638380262
          entropy_coeff: 0.009999999999999998
          kl: 0.011021090688341771
          policy_loss: -0.08354618524511655
          total_loss: -0.08639054567449622
          vf_explained_var: -0.5450742840766907
          vf_loss: 0.0007772147089579246
    num_agent_steps_sampled: 1410000
    num_agent_steps_trained: 1410000
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
  iterations_since_restore: 141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1410,35079.2,1410000,-0.08,0,-5,327.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1411000
  custom_metrics: {}
  date: 2021-10-09_08-09-40
  done: false
  episode_len_mean: 325.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 3942
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4564139975441828
          entropy_coeff: 0.009999999999999998
          kl: 0.00893865797828182
          policy_loss: -0.05608870691309373
          total_loss: -0.058356623641318744
          vf_explained_var: -0.44715145230293274
          vf_loss: 0.0006855451335367333
    num_agent_steps_sampled: 1411000
    num_agent_steps_trained: 1411000
    num_steps_sampled: 1411000
    num_steps_trained: 1411000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1411,35107.6,1411000,-0.05,0,-5,325.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1412000
  custom_metrics: {}
  date: 2021-10-09_08-10-09
  done: false
  episode_len_mean: 324.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 3945
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.3991058309872946
          entropy_coeff: 0.009999999999999998
          kl: 0.012276853072495363
          policy_loss: -0.016598096324337855
          total_loss: 0.18498506140377785
          vf_explained_var: -0.47037166357040405
          vf_loss: 0.19962746690467206
    num_agent_steps_sampled: 1412000
    num_agent_steps_trained: 1412000
    num_steps_sampled: 1412000
    num_steps_trained: 1412000
  iterations_since_restore: 1412

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1412,35137.1,1412000,-0.09,0,-5,324.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1413000
  custom_metrics: {}
  date: 2021-10-09_08-10-36
  done: false
  episode_len_mean: 323.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 3949
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8083530929353502
          entropy_coeff: 0.009999999999999998
          kl: 0.006987029573418507
          policy_loss: -0.05747825565437476
          total_loss: -0.05869740806519985
          vf_explained_var: 0.1081056073307991
          vf_loss: 0.007788725996700426
    num_agent_steps_sampled: 1413000
    num_agent_steps_trained: 1413000
    num_steps_sampled: 1413000
    num_steps_trained: 1413000
  iterations_since_restore: 1413


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1413,35163.9,1413000,-0.09,0,-5,323.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1414000
  custom_metrics: {}
  date: 2021-10-09_08-11-00
  done: false
  episode_len_mean: 324.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 3952
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6861519032054477
          entropy_coeff: 0.009999999999999998
          kl: 0.011600430402076997
          policy_loss: -0.029642667455805672
          total_loss: -0.024988862540986805
          vf_explained_var: -0.08672528713941574
          vf_loss: 0.006447196560394433
    num_agent_steps_sampled: 1414000
    num_agent_steps_trained: 1414000
    num_steps_sampled: 1414000
    num_steps_trained: 1414000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1414,35188.2,1414000,-0.09,0,-5,324.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1415000
  custom_metrics: {}
  date: 2021-10-09_08-11-27
  done: false
  episode_len_mean: 324.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 3955
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7629597451951768
          entropy_coeff: 0.009999999999999998
          kl: 0.012028753154068955
          policy_loss: -0.09069816294229693
          total_loss: -0.08358900356623862
          vf_explained_var: -0.45636656880378723
          vf_loss: 0.00911426742354201
    num_agent_steps_sampled: 1415000
    num_agent_steps_trained: 1415000
    num_steps_sampled: 1415000
    num_steps_trained: 1415000
  iterations_since_restore: 1415

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1415,35214.2,1415000,-0.09,0,-5,324.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1416000
  custom_metrics: {}
  date: 2021-10-09_08-11-53
  done: false
  episode_len_mean: 325.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 3958
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.554686849647098
          entropy_coeff: 0.009999999999999998
          kl: 0.009738538624526743
          policy_loss: -0.06881251622301837
          total_loss: -0.06734224531489114
          vf_explained_var: -0.11997252702713013
          vf_loss: 0.004367475408232875
    num_agent_steps_sampled: 1416000
    num_agent_steps_trained: 1416000
    num_steps_sampled: 1416000
    num_steps_trained: 1416000
  iterations_since_restore: 1416

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1416,35240.9,1416000,-0.09,0,-5,325.03




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1417000
  custom_metrics: {}
  date: 2021-10-09_08-12-37
  done: false
  episode_len_mean: 323.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3961
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.809933488898807
          entropy_coeff: 0.009999999999999998
          kl: 0.010943173266492244
          policy_loss: -0.007050191838708189
          total_loss: -0.005836730657352342
          vf_explained_var: 0.18593841791152954
          vf_loss: 0.005098394951265719
    num_agent_steps_sampled: 1417000
    num_agent_steps_trained: 1417000
    num_steps_sampled: 1417000
    num_steps_trained: 1417000
  iterations_since_restore: 141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1417,35284.5,1417000,-0.04,0,-4,323.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1418000
  custom_metrics: {}
  date: 2021-10-09_08-13-03
  done: false
  episode_len_mean: 323.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3964
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8554632875654433
          entropy_coeff: 0.009999999999999998
          kl: 0.010314381866405547
          policy_loss: -0.048093656616078485
          total_loss: -0.051718149251408045
          vf_explained_var: -0.24802984297275543
          vf_loss: 0.001532498523334248
    num_agent_steps_sampled: 1418000
    num_agent_steps_trained: 1418000
    num_steps_sampled: 1418000
    num_steps_trained: 1418000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1418,35310.4,1418000,-0.04,0,-4,323.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1419000
  custom_metrics: {}
  date: 2021-10-09_08-13-27
  done: false
  episode_len_mean: 325.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3967
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8148384941948785
          entropy_coeff: 0.009999999999999998
          kl: 0.008267005134743894
          policy_loss: -0.0854817542143994
          total_loss: -0.09030707341929277
          vf_explained_var: -1.0
          vf_loss: 0.002584818691118724
    num_agent_steps_sampled: 1419000
    num_agent_steps_trained: 1419000
    num_steps_sampled: 1419000
    num_steps_trained: 1419000
  iterations_since_restore: 1419
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1419,35334.5,1419000,-0.04,0,-4,325.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1420000
  custom_metrics: {}
  date: 2021-10-09_08-13-53
  done: false
  episode_len_mean: 325.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 3971
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7977524691157871
          entropy_coeff: 0.009999999999999998
          kl: 0.008890763464599872
          policy_loss: -0.06274244392083751
          total_loss: -0.0677197280443377
          vf_explained_var: -0.1323194056749344
          vf_loss: 0.0014517743282744455
    num_agent_steps_sampled: 1420000
    num_agent_steps_trained: 1420000
    num_steps_sampled: 1420000
    num_steps_trained: 1420000
  iterations_since_restore: 1420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1420,35361,1420000,-0.04,0,-4,325.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1421000
  custom_metrics: {}
  date: 2021-10-09_08-14-20
  done: false
  episode_len_mean: 325.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3974
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7663263188468086
          entropy_coeff: 0.009999999999999998
          kl: 0.010719198904887851
          policy_loss: -0.05431103689803018
          total_loss: -0.05637753560311264
          vf_explained_var: -0.5724488496780396
          vf_loss: 0.0016732923169102934
    num_agent_steps_sampled: 1421000
    num_agent_steps_trained: 1421000
    num_steps_sampled: 1421000
    num_steps_trained: 1421000
  iterations_since_restore: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1421,35387.2,1421000,-0.04,0,-4,325.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1422000
  custom_metrics: {}
  date: 2021-10-09_08-14-45
  done: false
  episode_len_mean: 324.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3977
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9033050404654608
          entropy_coeff: 0.009999999999999998
          kl: 0.008008157420748358
          policy_loss: -0.0436329061165452
          total_loss: -0.05077449255105522
          vf_explained_var: -1.0
          vf_loss: 0.001489439856312755
    num_agent_steps_sampled: 1422000
    num_agent_steps_trained: 1422000
    num_steps_sampled: 1422000
    num_steps_trained: 1422000
  iterations_since_restore: 1422
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1422,35412.5,1422000,-0.04,0,-4,324.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1423000
  custom_metrics: {}
  date: 2021-10-09_08-15-12
  done: false
  episode_len_mean: 323.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3980
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6220776518185933
          entropy_coeff: 0.009999999999999998
          kl: 0.009559782949853771
          policy_loss: -0.0563974991440773
          total_loss: -0.05931104645133019
          vf_explained_var: -0.30697137117385864
          vf_loss: 0.000889753460069187
    num_agent_steps_sampled: 1423000
    num_agent_steps_trained: 1423000
    num_steps_sampled: 1423000
    num_steps_trained: 1423000
  iterations_since_restore: 1423

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1423,35439.6,1423000,-0.04,0,-4,323.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1424000
  custom_metrics: {}
  date: 2021-10-09_08-15-38
  done: false
  episode_len_mean: 322.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3983
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.686174844370948
          entropy_coeff: 0.009999999999999998
          kl: 0.01116310736088144
          policy_loss: -0.0534182322728965
          total_loss: -0.05499178388466438
          vf_explained_var: -0.36222344636917114
          vf_loss: 0.0007881220553018567
    num_agent_steps_sampled: 1424000
    num_agent_steps_trained: 1424000
    num_steps_sampled: 1424000
    num_steps_trained: 1424000
  iterations_since_restore: 1424


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1424,35465.4,1424000,-0.04,0,-4,322.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1425000
  custom_metrics: {}
  date: 2021-10-09_08-16-05
  done: false
  episode_len_mean: 322.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3986
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5844772140185037
          entropy_coeff: 0.009999999999999998
          kl: 0.007134069888088825
          policy_loss: -0.10015353585282961
          total_loss: -0.10598787739872932
          vf_explained_var: -0.2593197822570801
          vf_loss: 0.0007437840210817134
    num_agent_steps_sampled: 1425000
    num_agent_steps_trained: 1425000
    num_steps_sampled: 1425000
    num_steps_trained: 1425000
  iterations_since_restore: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1425,35492.5,1425000,-0.04,0,-4,322.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1426000
  custom_metrics: {}
  date: 2021-10-09_08-16-46
  done: false
  episode_len_mean: 322.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 3990
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8507301727930705
          entropy_coeff: 0.009999999999999998
          kl: 0.0088200348328994
          policy_loss: -0.10470810135205587
          total_loss: -0.11053448774748378
          vf_explained_var: -0.9134117364883423
          vf_loss: 0.0012243172625959334
    num_agent_steps_sampled: 1426000
    num_agent_steps_trained: 1426000
    num_steps_sampled: 1426000
    num_steps_trained: 1426000
  iterations_since_restore: 1426


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1426,35533.8,1426000,-0.04,0,-4,322.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1427000
  custom_metrics: {}
  date: 2021-10-09_08-17-14
  done: false
  episode_len_mean: 321.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3993
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.591317433781094
          entropy_coeff: 0.009999999999999998
          kl: 0.007788804460990138
          policy_loss: -0.054785747132781476
          total_loss: -0.05989995549122493
          vf_explained_var: -0.9418160915374756
          vf_loss: 0.0006818655852435364
    num_agent_steps_sampled: 1427000
    num_agent_steps_trained: 1427000
    num_steps_sampled: 1427000
    num_steps_trained: 1427000
  iterations_since_restore: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1427,35561.7,1427000,-0.04,0,-4,321.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1428000
  custom_metrics: {}
  date: 2021-10-09_08-17-38
  done: false
  episode_len_mean: 322.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3996
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9087682604789733
          entropy_coeff: 0.009999999999999998
          kl: 0.009650232550629075
          policy_loss: -0.07885324108517833
          total_loss: -0.08438255480594105
          vf_explained_var: -0.6928936839103699
          vf_loss: 0.0010234062346474579
    num_agent_steps_sampled: 1428000
    num_agent_steps_trained: 1428000
    num_steps_sampled: 1428000
    num_steps_trained: 1428000
  iterations_since_restore: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1428,35585.5,1428000,-0.04,0,-4,322.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1429000
  custom_metrics: {}
  date: 2021-10-09_08-18-03
  done: false
  episode_len_mean: 321.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 3999
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9324620326360067
          entropy_coeff: 0.009999999999999998
          kl: 0.009659037401309512
          policy_loss: -0.0400990587969621
          total_loss: -0.04571578262580766
          vf_explained_var: -0.7992057800292969
          vf_loss: 0.0011614997994102952
    num_agent_steps_sampled: 1429000
    num_agent_steps_trained: 1429000
    num_steps_sampled: 1429000
    num_steps_trained: 1429000
  iterations_since_restore: 1429

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1429,35610.9,1429000,-0.04,0,-4,321.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1430000
  custom_metrics: {}
  date: 2021-10-09_08-18-29
  done: false
  episode_len_mean: 321.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4002
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5594912158118355
          entropy_coeff: 0.009999999999999998
          kl: 0.011017304938548357
          policy_loss: -0.059389551356434825
          total_loss: -0.05960283209052351
          vf_explained_var: -0.11241816729307175
          vf_loss: 0.001070943502579919
    num_agent_steps_sampled: 1430000
    num_agent_steps_trained: 1430000
    num_steps_sampled: 1430000
    num_steps_trained: 1430000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1430,35636,1430000,-0.04,0,-4,321.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1431000
  custom_metrics: {}
  date: 2021-10-09_08-18-56
  done: false
  episode_len_mean: 319.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4005
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.622116005420685
          entropy_coeff: 0.009999999999999998
          kl: 0.01028768029997309
          policy_loss: -0.005857816048794323
          total_loss: -0.008029353163308568
          vf_explained_var: 0.3217613995075226
          vf_loss: 0.000686662402909456
    num_agent_steps_sampled: 1431000
    num_agent_steps_trained: 1431000
    num_steps_sampled: 1431000
    num_steps_trained: 1431000
  iterations_since_restore: 1431


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1431,35663.2,1431000,-0.04,0,-4,319.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1432000
  custom_metrics: {}
  date: 2021-10-09_08-19-18
  done: false
  episode_len_mean: 319.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4008
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6316240628560383
          entropy_coeff: 0.009999999999999998
          kl: 0.009733328828715512
          policy_loss: -0.14872165218823485
          total_loss: -0.15167964949376053
          vf_explained_var: -0.5068345665931702
          vf_loss: 0.0007153480637094213
    num_agent_steps_sampled: 1432000
    num_agent_steps_trained: 1432000
    num_steps_sampled: 1432000
    num_steps_trained: 1432000
  iterations_since_restore: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1432,35685,1432000,-0.04,0,-4,319.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1433000
  custom_metrics: {}
  date: 2021-10-09_08-19-43
  done: false
  episode_len_mean: 320.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4011
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8703961491584777
          entropy_coeff: 0.009999999999999998
          kl: 0.010462550817027171
          policy_loss: -0.1454966351389885
          total_loss: -0.1499888496266471
          vf_explained_var: -0.5990177989006042
          vf_loss: 0.0006216403041940389
    num_agent_steps_sampled: 1433000
    num_agent_steps_trained: 1433000
    num_steps_sampled: 1433000
    num_steps_trained: 1433000
  iterations_since_restore: 1433


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1433,35709.8,1433000,-0.04,0,-4,320.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1434000
  custom_metrics: {}
  date: 2021-10-09_08-20-07
  done: false
  episode_len_mean: 320.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4014
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6084579865137736
          entropy_coeff: 0.009999999999999998
          kl: 0.009317742361738447
          policy_loss: -0.08348282641834683
          total_loss: -0.08702660128474235
          vf_explained_var: -0.6313092708587646
          vf_loss: 0.00043772274899917343
    num_agent_steps_sampled: 1434000
    num_agent_steps_trained: 1434000
    num_steps_sampled: 1434000
    num_steps_trained: 1434000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1434,35734.5,1434000,-0.04,0,-4,320.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1435000
  custom_metrics: {}
  date: 2021-10-09_08-20-33
  done: false
  episode_len_mean: 319.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4017
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.547780446211497
          entropy_coeff: 0.009999999999999998
          kl: 0.00797789714833339
          policy_loss: -0.014963378467493587
          total_loss: -0.019563988885945745
          vf_explained_var: -0.3665456175804138
          vf_loss: 0.0005144761687713779
    num_agent_steps_sampled: 1435000
    num_agent_steps_trained: 1435000
    num_steps_sampled: 1435000
    num_steps_trained: 1435000
  iterations_since_restore: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1435,35760.2,1435000,-0.04,0,-4,319.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1436000
  custom_metrics: {}
  date: 2021-10-09_08-21-18
  done: false
  episode_len_mean: 318.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4020
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.3252947515911526
          entropy_coeff: 0.009999999999999998
          kl: 0.009428893708397328
          policy_loss: -0.07180959153920412
          total_loss: -0.07240049569971031
          vf_explained_var: -0.276123583316803
          vf_loss: 0.00041458473262739263
    num_agent_steps_sampled: 1436000
    num_agent_steps_trained: 1436000
    num_steps_sampled: 1436000
    num_steps_trained: 1436000
  iterations_since_restore: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1436,35805.1,1436000,-0.04,0,-4,318.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1437000
  custom_metrics: {}
  date: 2021-10-09_08-21-44
  done: false
  episode_len_mean: 317.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 4024
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6147990571127997
          entropy_coeff: 0.009999999999999998
          kl: 0.013273278131264535
          policy_loss: -0.09612467793954743
          total_loss: -0.0944941119187408
          vf_explained_var: 0.26859593391418457
          vf_loss: 0.0005375187759960277
    num_agent_steps_sampled: 1437000
    num_agent_steps_trained: 1437000
    num_steps_sampled: 1437000
    num_steps_trained: 1437000
  iterations_since_restore: 1437

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1437,35831.3,1437000,-0.04,0,-4,317.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1438000
  custom_metrics: {}
  date: 2021-10-09_08-22-09
  done: false
  episode_len_mean: 317.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4027
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9090597497092352
          entropy_coeff: 0.009999999999999998
          kl: 0.011392904144442894
          policy_loss: -0.11700953789469269
          total_loss: -0.12075422323412366
          vf_explained_var: -0.7414988279342651
          vf_loss: 0.0005473441441103609
    num_agent_steps_sampled: 1438000
    num_agent_steps_trained: 1438000
    num_steps_sampled: 1438000
    num_steps_trained: 1438000
  iterations_since_restore: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1438,35856.4,1438000,-0.04,0,-4,317.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1439000
  custom_metrics: {}
  date: 2021-10-09_08-22-34
  done: false
  episode_len_mean: 317.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4030
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6521522018644546
          entropy_coeff: 0.009999999999999998
          kl: 0.008558630614551957
          policy_loss: -0.09850524713595708
          total_loss: -0.1036050592859586
          vf_explained_var: -0.050828319042921066
          vf_loss: 0.00030466191528830675
    num_agent_steps_sampled: 1439000
    num_agent_steps_trained: 1439000
    num_steps_sampled: 1439000
    num_steps_trained: 1439000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1439,35881.6,1439000,-0.04,0,-4,317.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1440000
  custom_metrics: {}
  date: 2021-10-09_08-22-59
  done: false
  episode_len_mean: 318.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4033
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8296372029516432
          entropy_coeff: 0.009999999999999998
          kl: 0.009441002636659777
          policy_loss: -0.07614347157585952
          total_loss: -0.08167949571377701
          vf_explained_var: -0.880757212638855
          vf_loss: 0.0004971618874050263
    num_agent_steps_sampled: 1440000
    num_agent_steps_trained: 1440000
    num_steps_sampled: 1440000
    num_steps_trained: 1440000
  iterations_since_restore: 1440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1440,35905.8,1440000,-0.04,0,-4,318.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1441000
  custom_metrics: {}
  date: 2021-10-09_08-23-25
  done: false
  episode_len_mean: 318.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4036
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8541026009453667
          entropy_coeff: 0.009999999999999998
          kl: 0.010025389800037171
          policy_loss: -0.10456372315271033
          total_loss: -0.10974648280276192
          vf_explained_var: -0.5886415839195251
          vf_loss: 0.0003359993851821249
    num_agent_steps_sampled: 1441000
    num_agent_steps_trained: 1441000
    num_steps_sampled: 1441000
    num_steps_trained: 1441000
  iterations_since_restore: 144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1441,35931.8,1441000,-0.04,0,-4,318.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1442000
  custom_metrics: {}
  date: 2021-10-09_08-23-46
  done: false
  episode_len_mean: 320.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 4038
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.855685826142629
          entropy_coeff: 0.009999999999999998
          kl: 0.009784464520551199
          policy_loss: -0.1499512893251247
          total_loss: -0.15543081412712734
          vf_explained_var: -0.6773121953010559
          vf_loss: 0.00036801287222058615
    num_agent_steps_sampled: 1442000
    num_agent_steps_trained: 1442000
    num_steps_sampled: 1442000
    num_steps_trained: 1442000
  iterations_since_restore: 1442

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1442,35952.9,1442000,-0.04,0,-4,320.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1443000
  custom_metrics: {}
  date: 2021-10-09_08-24-09
  done: false
  episode_len_mean: 322.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 4041
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8590976688596936
          entropy_coeff: 0.009999999999999998
          kl: 0.010056421478760017
          policy_loss: -0.019601042527291508
          total_loss: -0.024914947545362845
          vf_explained_var: -0.4970591962337494
          vf_loss: 0.00021450027513007322
    num_agent_steps_sampled: 1443000
    num_agent_steps_trained: 1443000
    num_steps_sampled: 1443000
    num_steps_trained: 1443000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1443,35975.7,1443000,-0.04,0,-4,322.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1444000
  custom_metrics: {}
  date: 2021-10-09_08-24-29
  done: false
  episode_len_mean: 324.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 4043
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6847547027799818
          entropy_coeff: 0.009999999999999998
          kl: 0.00979476696320055
          policy_loss: -0.12485029018587536
          total_loss: -0.12856055854095352
          vf_explained_var: -0.3171654939651489
          vf_loss: 0.00041457897491960064
    num_agent_steps_sampled: 1444000
    num_agent_steps_trained: 1444000
    num_steps_sampled: 1444000
    num_steps_trained: 1444000
  iterations_since_restore: 1444

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1444,35996.4,1444000,-0.04,0,-4,324.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1445000
  custom_metrics: {}
  date: 2021-10-09_08-24-52
  done: false
  episode_len_mean: 328.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4046
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7603889690505135
          entropy_coeff: 0.009999999999999998
          kl: 0.011078814467078571
          policy_loss: -0.03460464974244436
          total_loss: -0.03746958615051375
          vf_explained_var: -0.4376469850540161
          vf_loss: 0.00034836663592917224
    num_agent_steps_sampled: 1445000
    num_agent_steps_trained: 1445000
    num_steps_sampled: 1445000
    num_steps_trained: 1445000
  iterations_since_restore: 1445


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1445,36018.7,1445000,0,0,0,328.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1446000
  custom_metrics: {}
  date: 2021-10-09_08-25-15
  done: false
  episode_len_mean: 329.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4049
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.641802614265018
          entropy_coeff: 0.009999999999999998
          kl: 0.007814402320179149
          policy_loss: -0.08348459533105294
          total_loss: -0.08937745199849208
          vf_explained_var: -0.20097146928310394
          vf_loss: 0.0003748184783034958
    num_agent_steps_sampled: 1446000
    num_agent_steps_trained: 1446000
    num_steps_sampled: 1446000
    num_steps_trained: 1446000
  iterations_since_restore: 1446
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1446,36041.9,1446000,0,0,0,329.77




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1447000
  custom_metrics: {}
  date: 2021-10-09_08-25-57
  done: false
  episode_len_mean: 329.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4052
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.830512441529168
          entropy_coeff: 0.009999999999999998
          kl: 0.00972283254780481
          policy_loss: -0.06832577515807417
          total_loss: -0.07377674409912692
          vf_explained_var: -0.6518183946609497
          vf_loss: 0.00022489172042696736
    num_agent_steps_sampled: 1447000
    num_agent_steps_trained: 1447000
    num_steps_sampled: 1447000
    num_steps_trained: 1447000
  iterations_since_restore: 1447
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1447,36083.8,1447000,0,0,0,329.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1448000
  custom_metrics: {}
  date: 2021-10-09_08-26-22
  done: false
  episode_len_mean: 330.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4055
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7551002595159741
          entropy_coeff: 0.009999999999999998
          kl: 0.010585126777774987
          policy_loss: -0.038654857294427025
          total_loss: -0.042293008375498986
          vf_explained_var: -0.5496025681495667
          vf_loss: 0.00016352712959309833
    num_agent_steps_sampled: 1448000
    num_agent_steps_trained: 1448000
    num_steps_sampled: 1448000
    num_steps_trained: 1448000
  iterations_since_restore: 144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1448,36108.7,1448000,0,0,0,330.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1449000
  custom_metrics: {}
  date: 2021-10-09_08-26-49
  done: false
  episode_len_mean: 330.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4058
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7538448519176908
          entropy_coeff: 0.009999999999999998
          kl: 0.01101981644519804
          policy_loss: -0.1307016952584187
          total_loss: -0.1337781737248103
          vf_explained_var: -0.6228090524673462
          vf_loss: 0.00014802102264689488
    num_agent_steps_sampled: 1449000
    num_agent_steps_trained: 1449000
    num_steps_sampled: 1449000
    num_steps_trained: 1449000
  iterations_since_restore: 1449
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1449,36136.2,1449000,0,0,0,330.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1450000
  custom_metrics: {}
  date: 2021-10-09_08-27-15
  done: false
  episode_len_mean: 331.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4061
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5910064895947775
          entropy_coeff: 0.009999999999999998
          kl: 0.007692197308319177
          policy_loss: -0.0310788716086083
          total_loss: -0.00612893116970857
          vf_explained_var: -0.42780452966690063
          vf_loss: 0.030868388627034924
    num_agent_steps_sampled: 1450000
    num_agent_steps_trained: 1450000
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
  iterations_since_restore: 1450

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1450,36161.7,1450000,-0.01,0,-1,331.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1451000
  custom_metrics: {}
  date: 2021-10-09_08-27-41
  done: false
  episode_len_mean: 331.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4064
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8115773638089498
          entropy_coeff: 0.009999999999999998
          kl: 0.00875121810546836
          policy_loss: -0.02770948490748803
          total_loss: -0.031819904471437135
          vf_explained_var: -0.23920953273773193
          vf_loss: 0.0026381460471182235
    num_agent_steps_sampled: 1451000
    num_agent_steps_trained: 1451000
    num_steps_sampled: 1451000
    num_steps_trained: 1451000
  iterations_since_restore: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1451,36187.8,1451000,-0.01,0,-1,331.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1452000
  custom_metrics: {}
  date: 2021-10-09_08-28-08
  done: false
  episode_len_mean: 330.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4067
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.409499462445577
          entropy_coeff: 0.009999999999999998
          kl: 0.0070145253327894294
          policy_loss: -0.09982301021615664
          total_loss: -0.10388829343848759
          vf_explained_var: -0.3420621156692505
          vf_loss: 0.0009183443182135104
    num_agent_steps_sampled: 1452000
    num_agent_steps_trained: 1452000
    num_steps_sampled: 1452000
    num_steps_trained: 1452000
  iterations_since_restore: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1452,36215.2,1452000,-0.01,0,-1,330.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1453000
  custom_metrics: {}
  date: 2021-10-09_08-28-31
  done: false
  episode_len_mean: 331.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4070
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8401473177803886
          entropy_coeff: 0.009999999999999998
          kl: 0.009156795905487295
          policy_loss: -0.1347379775510894
          total_loss: -0.14008651487529278
          vf_explained_var: -0.7245979309082031
          vf_loss: 0.001158914135562049
    num_agent_steps_sampled: 1453000
    num_agent_steps_trained: 1453000
    num_steps_sampled: 1453000
    num_steps_trained: 1453000
  iterations_since_restore: 1453


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1453,36238.4,1453000,-0.01,0,-1,331.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1454000
  custom_metrics: {}
  date: 2021-10-09_08-28-59
  done: false
  episode_len_mean: 332.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4073
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.776958085430993
          entropy_coeff: 0.009999999999999998
          kl: 0.008944996060876755
          policy_loss: -0.04938895992106861
          total_loss: -0.05489880566795667
          vf_explained_var: -0.688154399394989
          vf_loss: 0.0006408261752868486
    num_agent_steps_sampled: 1454000
    num_agent_steps_trained: 1454000
    num_steps_sampled: 1454000
    num_steps_trained: 1454000
  iterations_since_restore: 1454
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1454,36265.4,1454000,-0.01,0,-1,332.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1455000
  custom_metrics: {}
  date: 2021-10-09_08-29-26
  done: false
  episode_len_mean: 331.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4076
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6122808403438993
          entropy_coeff: 0.009999999999999998
          kl: 0.008942249457387657
          policy_loss: -0.04639397809902827
          total_loss: -0.05016752945052253
          vf_explained_var: -0.7962985634803772
          vf_loss: 0.0007339178450315052
    num_agent_steps_sampled: 1455000
    num_agent_steps_trained: 1455000
    num_steps_sampled: 1455000
    num_steps_trained: 1455000
  iterations_since_restore: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1455,36292.5,1455000,-0.01,0,-1,331.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1456000
  custom_metrics: {}
  date: 2021-10-09_08-29-52
  done: false
  episode_len_mean: 332.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4079
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6391081902715894
          entropy_coeff: 0.009999999999999998
          kl: 0.008832300249706807
          policy_loss: -0.07346774470061064
          total_loss: -0.07742448908587297
          vf_explained_var: -0.876325249671936
          vf_loss: 0.0009618134464188996
    num_agent_steps_sampled: 1456000
    num_agent_steps_trained: 1456000
    num_steps_sampled: 1456000
    num_steps_trained: 1456000
  iterations_since_restore: 1456

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1456,36318.6,1456000,-0.01,0,-1,332.52




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1457000
  custom_metrics: {}
  date: 2021-10-09_08-30-37
  done: false
  episode_len_mean: 332.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 4083
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6457498682869804
          entropy_coeff: 0.009999999999999998
          kl: 0.010345726442511192
          policy_loss: -0.08622137481967608
          total_loss: -0.08877453555663427
          vf_explained_var: -0.3068271279335022
          vf_loss: 0.00046597868373889165
    num_agent_steps_sampled: 1457000
    num_agent_steps_trained: 1457000
    num_steps_sampled: 1457000
    num_steps_trained: 1457000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1457,36363.7,1457000,-0.01,0,-1,332.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1458000
  custom_metrics: {}
  date: 2021-10-09_08-31-03
  done: false
  episode_len_mean: 332.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4086
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4994753188557095
          entropy_coeff: 0.009999999999999998
          kl: 0.011676607021006329
          policy_loss: -0.11303667223287953
          total_loss: -0.11228046557969518
          vf_explained_var: -0.3607618510723114
          vf_loss: 0.0005838828958480412
    num_agent_steps_sampled: 1458000
    num_agent_steps_trained: 1458000
    num_steps_sampled: 1458000
    num_steps_trained: 1458000
  iterations_since_restore: 145

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1458,36389.9,1458000,-0.01,0,-1,332.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1459000
  custom_metrics: {}
  date: 2021-10-09_08-31-28
  done: false
  episode_len_mean: 332.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4089
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5081259065204196
          entropy_coeff: 0.009999999999999998
          kl: 0.00820215224688447
          policy_loss: -0.05346266441047191
          total_loss: -0.05747839994728565
          vf_explained_var: -1.0
          vf_loss: 0.0004115148259895957
    num_agent_steps_sampled: 1459000
    num_agent_steps_trained: 1459000
    num_steps_sampled: 1459000
    num_steps_trained: 1459000
  iterations_since_restore: 1459
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1459,36414.3,1459000,-0.01,0,-1,332.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1460000
  custom_metrics: {}
  date: 2021-10-09_08-31-51
  done: false
  episode_len_mean: 333.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 4091
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7821763396263122
          entropy_coeff: 0.009999999999999998
          kl: 0.009810227265867421
          policy_loss: -0.04635173035785556
          total_loss: -0.0509836013842788
          vf_explained_var: -0.942059338092804
          vf_loss: 0.00044710831021398513
    num_agent_steps_sampled: 1460000
    num_agent_steps_trained: 1460000
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
  iterations_since_restore: 1460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1460,36437.3,1460000,-0.01,0,-1,333.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1461000
  custom_metrics: {}
  date: 2021-10-09_08-32-15
  done: false
  episode_len_mean: 334.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4094
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.868396876917945
          entropy_coeff: 0.009999999999999998
          kl: 0.00870682124340946
          policy_loss: -0.07165078082018428
          total_loss: -0.07854586394710673
          vf_explained_var: -0.8433752059936523
          vf_loss: 0.00047934866403617586
    num_agent_steps_sampled: 1461000
    num_agent_steps_trained: 1461000
    num_steps_sampled: 1461000
    num_steps_trained: 1461000
  iterations_since_restore: 1461

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1461,36461.6,1461000,-0.01,0,-1,334.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1462000
  custom_metrics: {}
  date: 2021-10-09_08-32-39
  done: false
  episode_len_mean: 335.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4097
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7483526945114136
          entropy_coeff: 0.009999999999999998
          kl: 0.009295908407196771
          policy_loss: -0.04197710144023101
          total_loss: -0.04680582926505142
          vf_explained_var: -0.999703049659729
          vf_loss: 0.0005800831311110718
    num_agent_steps_sampled: 1462000
    num_agent_steps_trained: 1462000
    num_steps_sampled: 1462000
    num_steps_trained: 1462000
  iterations_since_restore: 1462

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1462,36485.3,1462000,-0.01,0,-1,335.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1463000
  custom_metrics: {}
  date: 2021-10-09_08-33-05
  done: false
  episode_len_mean: 333.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4100
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7348697847790189
          entropy_coeff: 0.009999999999999998
          kl: 0.00987285489576191
          policy_loss: -0.0687961700061957
          total_loss: -0.07310183182772663
          vf_explained_var: -0.41092219948768616
          vf_loss: 0.00021890543398007543
    num_agent_steps_sampled: 1463000
    num_agent_steps_trained: 1463000
    num_steps_sampled: 1463000
    num_steps_trained: 1463000
  iterations_since_restore: 146

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1463,36512.2,1463000,-0.01,0,-1,333.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1464000
  custom_metrics: {}
  date: 2021-10-09_08-33-31
  done: false
  episode_len_mean: 334.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 4104
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5688264462682935
          entropy_coeff: 0.009999999999999998
          kl: 0.008985046001651181
          policy_loss: -0.08827662699752384
          total_loss: -0.09200521550244756
          vf_explained_var: 0.4609561562538147
          vf_loss: 0.0002887487201304692
    num_agent_steps_sampled: 1464000
    num_agent_steps_trained: 1464000
    num_steps_sampled: 1464000
    num_steps_trained: 1464000
  iterations_since_restore: 1464

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1464,36537.9,1464000,-0.01,0,-1,334.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1465000
  custom_metrics: {}
  date: 2021-10-09_08-33-58
  done: false
  episode_len_mean: 333.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4107
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7577449533674452
          entropy_coeff: 0.009999999999999998
          kl: 0.00953036504653121
          policy_loss: -0.09743298167983691
          total_loss: -0.10243994096914927
          vf_explained_var: -0.6837280988693237
          vf_loss: 0.00019123030692248398
    num_agent_steps_sampled: 1465000
    num_agent_steps_trained: 1465000
    num_steps_sampled: 1465000
    num_steps_trained: 1465000
  iterations_since_restore: 146

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1465,36564.2,1465000,-0.01,0,-1,333.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1466000
  custom_metrics: {}
  date: 2021-10-09_08-34-39
  done: false
  episode_len_mean: 333.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4110
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6827758683098688
          entropy_coeff: 0.009999999999999998
          kl: 0.01189966129306599
          policy_loss: -0.044470254042082366
          total_loss: -0.04548592418432236
          vf_explained_var: -0.1202278584241867
          vf_loss: 0.0003552799017698918
    num_agent_steps_sampled: 1466000
    num_agent_steps_trained: 1466000
    num_steps_sampled: 1466000
    num_steps_trained: 1466000
  iterations_since_restore: 1466

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1466,36605.7,1466000,-0.01,0,-1,333.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1467000
  custom_metrics: {}
  date: 2021-10-09_08-35-05
  done: false
  episode_len_mean: 333.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4113
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7900124351183573
          entropy_coeff: 0.009999999999999998
          kl: 0.01282456508100199
          policy_loss: -0.11819928462306659
          total_loss: -0.11920130261116557
          vf_explained_var: -0.03939550742506981
          vf_loss: 0.0002399143387770487
    num_agent_steps_sampled: 1467000
    num_agent_steps_trained: 1467000
    num_steps_sampled: 1467000
    num_steps_trained: 1467000
  iterations_since_restore: 146

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1467,36631.9,1467000,-0.01,0,-1,333.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1468000
  custom_metrics: {}
  date: 2021-10-09_08-35-30
  done: false
  episode_len_mean: 334.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4116
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7215042008294
          entropy_coeff: 0.009999999999999998
          kl: 0.013111707820072718
          policy_loss: -0.13263176460233
          total_loss: -0.13257151519258817
          vf_explained_var: -0.39169490337371826
          vf_loss: 0.00024412265783111151
    num_agent_steps_sampled: 1468000
    num_agent_steps_trained: 1468000
    num_steps_sampled: 1468000
    num_steps_trained: 1468000
  iterations_since_restore: 1468
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1468,36656.4,1468000,-0.01,0,-1,334.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1469000
  custom_metrics: {}
  date: 2021-10-09_08-35-55
  done: false
  episode_len_mean: 334.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4119
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.809084826045566
          entropy_coeff: 0.009999999999999998
          kl: 0.010270400758227037
          policy_loss: -0.12455990732543998
          total_loss: -0.12912583963738547
          vf_explained_var: -0.9876881837844849
          vf_loss: 0.0001844029275364139
    num_agent_steps_sampled: 1469000
    num_agent_steps_trained: 1469000
    num_steps_sampled: 1469000
    num_steps_trained: 1469000
  iterations_since_restore: 1469

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1469,36681.7,1469000,-0.01,0,-1,334.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1470000
  custom_metrics: {}
  date: 2021-10-09_08-36-17
  done: false
  episode_len_mean: 336.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 4121
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6769088453716703
          entropy_coeff: 0.009999999999999998
          kl: 0.008416649770222131
          policy_loss: -0.06146142188873556
          total_loss: -0.06709468054274718
          vf_explained_var: -0.2710915207862854
          vf_loss: 0.00020320441690273582
    num_agent_steps_sampled: 1470000
    num_agent_steps_trained: 1470000
    num_steps_sampled: 1470000
    num_steps_trained: 1470000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1470,36703.8,1470000,-0.01,0,-1,336.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1471000
  custom_metrics: {}
  date: 2021-10-09_08-36-39
  done: false
  episode_len_mean: 338.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4124
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7255055758688185
          entropy_coeff: 0.009999999999999998
          kl: 0.01027155443299945
          policy_loss: -0.08403626173320744
          total_loss: -0.08781713154166937
          vf_explained_var: -0.5820397138595581
          vf_loss: 0.00013217305907649765
    num_agent_steps_sampled: 1471000
    num_agent_steps_trained: 1471000
    num_steps_sampled: 1471000
    num_steps_trained: 1471000
  iterations_since_restore: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1471,36725.5,1471000,-0.01,0,-1,338.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1472000
  custom_metrics: {}
  date: 2021-10-09_08-37-07
  done: false
  episode_len_mean: 338.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4127
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7924624231126574
          entropy_coeff: 0.009999999999999998
          kl: 0.00896561138600131
          policy_loss: -0.09426252705355485
          total_loss: -0.10039267713824908
          vf_explained_var: -0.3765106499195099
          vf_loss: 0.00014878935184646657
    num_agent_steps_sampled: 1472000
    num_agent_steps_trained: 1472000
    num_steps_sampled: 1472000
    num_steps_trained: 1472000
  iterations_since_restore: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1472,36753.2,1472000,-0.01,0,-1,338.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1473000
  custom_metrics: {}
  date: 2021-10-09_08-37-32
  done: false
  episode_len_mean: 337.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4130
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4300750043657091
          entropy_coeff: 0.009999999999999998
          kl: 0.010027707268190432
          policy_loss: -0.05204674932691786
          total_loss: -0.05312572038835949
          vf_explained_var: 0.3406122028827667
          vf_loss: 0.000196502594836703
    num_agent_steps_sampled: 1473000
    num_agent_steps_trained: 1473000
    num_steps_sampled: 1473000
    num_steps_trained: 1473000
  iterations_since_restore: 1473


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1473,36778.2,1473000,-0.01,0,-1,337.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1474000
  custom_metrics: {}
  date: 2021-10-09_08-37-55
  done: false
  episode_len_mean: 338.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4133
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5769129925303988
          entropy_coeff: 0.009999999999999998
          kl: 0.010417717857220123
          policy_loss: -0.14156333021819592
          total_loss: -0.1435529075562954
          vf_explained_var: -0.004641511477530003
          vf_loss: 0.0002476854274088206
    num_agent_steps_sampled: 1474000
    num_agent_steps_trained: 1474000
    num_steps_sampled: 1474000
    num_steps_trained: 1474000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1474,36801.4,1474000,-0.01,0,-1,338.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1475000
  custom_metrics: {}
  date: 2021-10-09_08-38-20
  done: false
  episode_len_mean: 338.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4136
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.638892083697849
          entropy_coeff: 0.009999999999999998
          kl: 0.013634610130442561
          policy_loss: -0.09390472852521473
          total_loss: -0.09244179425554143
          vf_explained_var: 0.12497341632843018
          vf_loss: 0.000141473861731356
    num_agent_steps_sampled: 1475000
    num_agent_steps_trained: 1475000
    num_steps_sampled: 1475000
    num_steps_trained: 1475000
  iterations_since_restore: 1475


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1475,36826.9,1475000,-0.01,0,-1,338.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1476000
  custom_metrics: {}
  date: 2021-10-09_08-38-46
  done: false
  episode_len_mean: 335.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4139
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7201299746831258
          entropy_coeff: 0.009999999999999998
          kl: 0.01061718169630488
          policy_loss: -0.12430343565841516
          total_loss: -0.12764229654437967
          vf_explained_var: -0.4792430102825165
          vf_loss: 7.148027357794086e-05
    num_agent_steps_sampled: 1476000
    num_agent_steps_trained: 1476000
    num_steps_sampled: 1476000
    num_steps_trained: 1476000
  iterations_since_restore: 1476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1476,36852.8,1476000,-0.01,0,-1,335.63




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1477000
  custom_metrics: {}
  date: 2021-10-09_08-39-29
  done: false
  episode_len_mean: 334.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 4143
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4857571005821228
          entropy_coeff: 0.009999999999999998
          kl: 0.014773615122008366
          policy_loss: -0.013092009060912662
          total_loss: -0.008509395933813519
          vf_explained_var: 0.4060278534889221
          vf_loss: 0.00025031418222675307
    num_agent_steps_sampled: 1477000
    num_agent_steps_trained: 1477000
    num_steps_sampled: 1477000
    num_steps_trained: 1477000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1477,36895.6,1477000,-0.01,0,-1,334.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1478000
  custom_metrics: {}
  date: 2021-10-09_08-39-51
  done: false
  episode_len_mean: 332.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 4145
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7227234151628283
          entropy_coeff: 0.009999999999999998
          kl: 0.010099097639803666
          policy_loss: -0.13571286300818125
          total_loss: -0.139713744363851
          vf_explained_var: -0.05601467937231064
          vf_loss: 0.00010834844191170608
    num_agent_steps_sampled: 1478000
    num_agent_steps_trained: 1478000
    num_steps_sampled: 1478000
    num_steps_trained: 1478000
  iterations_since_restore: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1478,36917.3,1478000,-0.01,0,-1,332.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1479000
  custom_metrics: {}
  date: 2021-10-09_08-40-17
  done: false
  episode_len_mean: 332.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4148
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5812116132842169
          entropy_coeff: 0.009999999999999998
          kl: 0.010709601176543846
          policy_loss: -0.08219576142728328
          total_loss: -0.08400249903400739
          vf_explained_var: -0.6974713206291199
          vf_loss: 9.437496054387238e-05
    num_agent_steps_sampled: 1479000
    num_agent_steps_trained: 1479000
    num_steps_sampled: 1479000
    num_steps_trained: 1479000
  iterations_since_restore: 1479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1479,36943.2,1479000,-0.01,0,-1,332.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1480000
  custom_metrics: {}
  date: 2021-10-09_08-40-41
  done: false
  episode_len_mean: 332.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4151
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7084782163302104
          entropy_coeff: 0.009999999999999998
          kl: 0.010252146405469582
          policy_loss: -0.10617525376793412
          total_loss: -0.10985497281783158
          vf_explained_var: -0.5710734724998474
          vf_loss: 8.82596419816966e-05
    num_agent_steps_sampled: 1480000
    num_agent_steps_trained: 1480000
    num_steps_sampled: 1480000
    num_steps_trained: 1480000
  iterations_since_restore: 1480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1480,36967.5,1480000,-0.01,0,-1,332.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1481000
  custom_metrics: {}
  date: 2021-10-09_08-41-08
  done: false
  episode_len_mean: 330.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4154
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5147238810857138
          entropy_coeff: 0.009999999999999998
          kl: 0.007009287558838848
          policy_loss: -0.061397378684745894
          total_loss: -0.06736704061428705
          vf_explained_var: 0.024505162611603737
          vf_loss: 7.30124590821409e-05
    num_agent_steps_sampled: 1481000
    num_agent_steps_trained: 1481000
    num_steps_sampled: 1481000
    num_steps_trained: 1481000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1481,36994.4,1481000,-0.01,0,-1,330.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1482000
  custom_metrics: {}
  date: 2021-10-09_08-41-32
  done: false
  episode_len_mean: 331.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 4157
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6850862834188673
          entropy_coeff: 0.009999999999999998
          kl: 0.008870980146144486
          policy_loss: -0.03714094244771533
          total_loss: -0.04240149752133422
          vf_explained_var: -0.7426727414131165
          vf_loss: 6.753986910755177e-05
    num_agent_steps_sampled: 1482000
    num_agent_steps_trained: 1482000
    num_steps_sampled: 1482000
    num_steps_trained: 1482000
  iterations_since_restore: 148

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1482,37018.6,1482000,-0.01,0,-1,331.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1483000
  custom_metrics: {}
  date: 2021-10-09_08-42-01
  done: false
  episode_len_mean: 330.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4161
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4966637214024863
          entropy_coeff: 0.009999999999999998
          kl: 0.009018558919901467
          policy_loss: -0.09010658715334204
          total_loss: -0.09327754452824592
          vf_explained_var: -0.4412907063961029
          vf_loss: 8.121944425106953e-05
    num_agent_steps_sampled: 1483000
    num_agent_steps_trained: 1483000
    num_steps_sampled: 1483000
    num_steps_trained: 1483000
  iterations_since_restore: 1483
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1483,37046.9,1483000,0,0,0,330.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1484000
  custom_metrics: {}
  date: 2021-10-09_08-42-25
  done: false
  episode_len_mean: 331.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4163
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4783567216661242
          entropy_coeff: 0.009999999999999998
          kl: 0.007277111968162428
          policy_loss: -0.034093799421356784
          total_loss: -0.03933580219745636
          vf_explained_var: -0.011333851143717766
          vf_loss: 8.911549476680294e-05
    num_agent_steps_sampled: 1484000
    num_agent_steps_trained: 1484000
    num_steps_sampled: 1484000
    num_steps_trained: 1484000
  iterations_since_restore: 1484

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1484,37071.1,1484000,0,0,0,331.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1485000
  custom_metrics: {}
  date: 2021-10-09_08-42-50
  done: false
  episode_len_mean: 331.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.582940071158939
          entropy_coeff: 0.009999999999999998
          kl: 0.012590811442386496
          policy_loss: -0.08824415227605237
          total_loss: -0.08763336793829997
          vf_explained_var: -0.3314242362976074
          vf_loss: 8.5624089964161e-05
    num_agent_steps_sampled: 1485000
    num_agent_steps_trained: 1485000
    num_steps_sampled: 1485000
    num_steps_trained: 1485000
  iterations_since_restore: 1485
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1485,37096,1485000,0,0,0,331.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1486000
  custom_metrics: {}
  date: 2021-10-09_08-43-11
  done: false
  episode_len_mean: 334.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4169
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.2405022535059187
          entropy_coeff: 0.009999999999999998
          kl: 0.008107410024346014
          policy_loss: -0.049981607662306894
          total_loss: -0.05174891274008486
          vf_explained_var: -0.005743614863604307
          vf_loss: 0.00010676972605223353
    num_agent_steps_sampled: 1486000
    num_agent_steps_trained: 1486000
    num_steps_sampled: 1486000
    num_steps_trained: 1486000
  iterations_since_restore: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1486,37117.1,1486000,0,0,0,334.29




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1487000
  custom_metrics: {}
  date: 2021-10-09_08-43-54
  done: false
  episode_len_mean: 332.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4172
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7416608452796936
          entropy_coeff: 0.009999999999999998
          kl: 0.011640571209030762
          policy_loss: -0.1200124439679914
          total_loss: -0.12222326969106992
          vf_explained_var: -0.39321646094322205
          vf_loss: 8.55160846387864e-05
    num_agent_steps_sampled: 1487000
    num_agent_steps_trained: 1487000
    num_steps_sampled: 1487000
    num_steps_trained: 1487000
  iterations_since_restore: 1487
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1487,37160.1,1487000,0,0,0,332.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1488000
  custom_metrics: {}
  date: 2021-10-09_08-44-18
  done: false
  episode_len_mean: 333.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4175
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7489879727363586
          entropy_coeff: 0.009999999999999998
          kl: 0.010123207614546804
          policy_loss: -0.09409173139267497
          total_loss: -0.09835518557164404
          vf_explained_var: -0.19005464017391205
          vf_loss: 7.710600843387914e-05
    num_agent_steps_sampled: 1488000
    num_agent_steps_trained: 1488000
    num_steps_sampled: 1488000
    num_steps_trained: 1488000
  iterations_since_restore: 1488


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1488,37184.1,1488000,0,0,0,333.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1489000
  custom_metrics: {}
  date: 2021-10-09_08-44-42
  done: false
  episode_len_mean: 334.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4178
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5626537548171149
          entropy_coeff: 0.009999999999999998
          kl: 0.010208877191214057
          policy_loss: -0.06768189122279485
          total_loss: -0.0699865176445908
          vf_explained_var: -0.8751036524772644
          vf_loss: 6.130617109900211e-05
    num_agent_steps_sampled: 1489000
    num_agent_steps_trained: 1489000
    num_steps_sampled: 1489000
    num_steps_trained: 1489000
  iterations_since_restore: 1489
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1489,37207.8,1489000,0,0,0,334.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1490000
  custom_metrics: {}
  date: 2021-10-09_08-45-09
  done: false
  episode_len_mean: 334.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4181
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.615140551990933
          entropy_coeff: 0.009999999999999998
          kl: 0.01098407795844949
          policy_loss: -0.039261697149939004
          total_loss: -0.04088318778408898
          vf_explained_var: -0.22917716205120087
          vf_loss: 0.00026238553519861954
    num_agent_steps_sampled: 1490000
    num_agent_steps_trained: 1490000
    num_steps_sampled: 1490000
    num_steps_trained: 1490000
  iterations_since_restore: 1490
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1490,37234.7,1490000,0,0,0,334.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1491000
  custom_metrics: {}
  date: 2021-10-09_08-45-34
  done: false
  episode_len_mean: 334.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4184
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7853387607468498
          entropy_coeff: 0.009999999999999998
          kl: 0.008312369875713908
          policy_loss: -0.017964217935999234
          total_loss: -0.024942986791332562
          vf_explained_var: -0.7259383201599121
          vf_loss: 7.744515674453901e-05
    num_agent_steps_sampled: 1491000
    num_agent_steps_trained: 1491000
    num_steps_sampled: 1491000
    num_steps_trained: 1491000
  iterations_since_restore: 1491

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1491,37260.4,1491000,0,0,0,334.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1492000
  custom_metrics: {}
  date: 2021-10-09_08-46-01
  done: false
  episode_len_mean: 334.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4188
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4929809861712986
          entropy_coeff: 0.009999999999999998
          kl: 0.009429132364082745
          policy_loss: -0.05031214977304141
          total_loss: -0.05294492617249489
          vf_explained_var: -0.44825994968414307
          vf_loss: 4.926671327767609e-05
    num_agent_steps_sampled: 1492000
    num_agent_steps_trained: 1492000
    num_steps_sampled: 1492000
    num_steps_trained: 1492000
  iterations_since_restore: 1492


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1492,37287.4,1492000,0,0,0,334.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1493000
  custom_metrics: {}
  date: 2021-10-09_08-46-24
  done: false
  episode_len_mean: 334.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4190
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7983208431137934
          entropy_coeff: 0.009999999999999998
          kl: 0.010608018483865144
          policy_loss: -0.06721259293456873
          total_loss: -0.07136551058954663
          vf_explained_var: -0.19767025113105774
          vf_loss: 5.123451171837385e-05
    num_agent_steps_sampled: 1493000
    num_agent_steps_trained: 1493000
    num_steps_sampled: 1493000
    num_steps_trained: 1493000
  iterations_since_restore: 1493


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1493,37309.8,1493000,0,0,0,334.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1494000
  custom_metrics: {}
  date: 2021-10-09_08-46-52
  done: false
  episode_len_mean: 332.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4194
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4815472390916613
          entropy_coeff: 0.009999999999999998
          kl: 0.00992756297422621
          policy_loss: -0.06858476400375366
          total_loss: -0.0704692063232263
          vf_explained_var: -0.383063942193985
          vf_loss: 3.5836669222691044e-05
    num_agent_steps_sampled: 1494000
    num_agent_steps_trained: 1494000
    num_steps_sampled: 1494000
    num_steps_trained: 1494000
  iterations_since_restore: 1494
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1494,37338.5,1494000,0,0,0,332.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1495000
  custom_metrics: {}
  date: 2021-10-09_08-47-16
  done: false
  episode_len_mean: 331.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4197
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5006954259342618
          entropy_coeff: 0.009999999999999998
          kl: 0.00947952596661068
          policy_loss: -0.05893822891844643
          total_loss: -0.06156022294114034
          vf_explained_var: -0.4550195038318634
          vf_loss: 7.173735357355327e-05
    num_agent_steps_sampled: 1495000
    num_agent_steps_trained: 1495000
    num_steps_sampled: 1495000
    num_steps_trained: 1495000
  iterations_since_restore: 1495
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1495,37362.1,1495000,0,0,0,331.61




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1496000
  custom_metrics: {}
  date: 2021-10-09_08-47-59
  done: false
  episode_len_mean: 331.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4200
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.580770817067888
          entropy_coeff: 0.009999999999999998
          kl: 0.010454310385769045
          policy_loss: -0.14474256179398962
          total_loss: -0.14693936407566072
          vf_explained_var: 0.29837068915367126
          vf_loss: 3.1507282013384e-05
    num_agent_steps_sampled: 1496000
    num_agent_steps_trained: 1496000
    num_steps_sampled: 1496000
    num_steps_trained: 1496000
  iterations_since_restore: 1496
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1496,37405.2,1496000,0,0,0,331.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1497000
  custom_metrics: {}
  date: 2021-10-09_08-48-28
  done: false
  episode_len_mean: 329.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4204
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.3998013668590121
          entropy_coeff: 0.009999999999999998
          kl: 0.007791119103891633
          policy_loss: -0.09474559556692838
          total_loss: -0.09858370102528069
          vf_explained_var: 0.2883699834346771
          vf_loss: 3.980046696799238e-05
    num_agent_steps_sampled: 1497000
    num_agent_steps_trained: 1497000
    num_steps_sampled: 1497000
    num_steps_trained: 1497000
  iterations_since_restore: 1497
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1497,37434.2,1497000,0,0,0,329.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1498000
  custom_metrics: {}
  date: 2021-10-09_08-48-56
  done: false
  episode_len_mean: 329.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4207
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6091029405593873
          entropy_coeff: 0.009999999999999998
          kl: 0.009084149015431977
          policy_loss: -0.04690084904432297
          total_loss: -0.051155839011900954
          vf_explained_var: 0.31553786993026733
          vf_loss: 3.6382870156457e-05
    num_agent_steps_sampled: 1498000
    num_agent_steps_trained: 1498000
    num_steps_sampled: 1498000
    num_steps_trained: 1498000
  iterations_since_restore: 1498
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1498,37462.4,1498000,0,0,0,329.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1499000
  custom_metrics: {}
  date: 2021-10-09_08-49-24
  done: false
  episode_len_mean: 327.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4210
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.55052895810869
          entropy_coeff: 0.009999999999999998
          kl: 0.01145139351185754
          policy_loss: -0.017358404439356594
          total_loss: -0.017945393299063046
          vf_explained_var: -0.3720801770687103
          vf_loss: 4.3761414579219285e-05
    num_agent_steps_sampled: 1499000
    num_agent_steps_trained: 1499000
    num_steps_sampled: 1499000
    num_steps_trained: 1499000
  iterations_since_restore: 1499
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1499,37490.1,1499000,0,0,0,327.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1500000
  custom_metrics: {}
  date: 2021-10-09_08-49-49
  done: false
  episode_len_mean: 327.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4213
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6353022694587707
          entropy_coeff: 0.009999999999999998
          kl: 0.008802610722954875
          policy_loss: -0.04703109744522307
          total_loss: -0.05191097255382273
          vf_explained_var: -0.5191753506660461
          vf_loss: 3.9185177552604325e-05
    num_agent_steps_sampled: 1500000
    num_agent_steps_trained: 1500000
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
  iterations_since_restore: 1500


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1500,37514.8,1500000,0,0,0,327.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1501000
  custom_metrics: {}
  date: 2021-10-09_08-50-15
  done: false
  episode_len_mean: 325.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4217
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5824304646915859
          entropy_coeff: 0.009999999999999998
          kl: 0.008002301534981429
          policy_loss: -0.0631983608007431
          total_loss: -0.06859166936741935
          vf_explained_var: -0.3070535957813263
          vf_loss: 3.6578331451487934e-05
    num_agent_steps_sampled: 1501000
    num_agent_steps_trained: 1501000
    num_steps_sampled: 1501000
    num_steps_trained: 1501000
  iterations_since_restore: 1501
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1501,37541,1501000,0,0,0,325.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1502000
  custom_metrics: {}
  date: 2021-10-09_08-50-43
  done: false
  episode_len_mean: 325.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4220
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.578261313173506
          entropy_coeff: 0.009999999999999998
          kl: 0.013366484235296797
          policy_loss: -0.043203902720577184
          total_loss: -0.04155007888459497
          vf_explained_var: -0.05982930213212967
          vf_loss: 7.433260331405715e-05
    num_agent_steps_sampled: 1502000
    num_agent_steps_trained: 1502000
    num_steps_sampled: 1502000
    num_steps_trained: 1502000
  iterations_since_restore: 1502


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1502,37568.9,1502000,0,0,0,325.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1503000
  custom_metrics: {}
  date: 2021-10-09_08-51-11
  done: false
  episode_len_mean: 321.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4224
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.3757157186667124
          entropy_coeff: 0.009999999999999998
          kl: 0.008659971465810736
          policy_loss: -0.058434479683637616
          total_loss: -0.060910561308264735
          vf_explained_var: -0.837902307510376
          vf_loss: 3.23929176981134e-05
    num_agent_steps_sampled: 1503000
    num_agent_steps_trained: 1503000
    num_steps_sampled: 1503000
    num_steps_trained: 1503000
  iterations_since_restore: 1503
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1503,37597,1503000,0,0,0,321.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1504000
  custom_metrics: {}
  date: 2021-10-09_08-51-41
  done: false
  episode_len_mean: 319.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4227
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.2105491406387754
          entropy_coeff: 0.009999999999999998
          kl: 0.006056148411157617
          policy_loss: -0.05514873382118013
          total_loss: -0.05843558799889353
          vf_explained_var: 0.4139520525932312
          vf_loss: 0.0009521335587325869
    num_agent_steps_sampled: 1504000
    num_agent_steps_trained: 1504000
    num_steps_sampled: 1504000
    num_steps_trained: 1504000
  iterations_since_restore: 1504
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1504,37626.6,1504000,0,0,0,319.98




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1505000
  custom_metrics: {}
  date: 2021-10-09_08-52-25
  done: false
  episode_len_mean: 318.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4231
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.531882099310557
          entropy_coeff: 0.009999999999999998
          kl: 0.008753763541302383
          policy_loss: -0.09276131050040325
          total_loss: -0.09627585839480161
          vf_explained_var: -0.26120424270629883
          vf_loss: 0.0004337621777570651
    num_agent_steps_sampled: 1505000
    num_agent_steps_trained: 1505000
    num_steps_sampled: 1505000
    num_steps_trained: 1505000
  iterations_since_restore: 1505
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1505,37671.3,1505000,0,0,0,318.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1506000
  custom_metrics: {}
  date: 2021-10-09_08-52-55
  done: false
  episode_len_mean: 316.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4234
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4754972100257873
          entropy_coeff: 0.009999999999999998
          kl: 0.009073379032798338
          policy_loss: -0.07622707786245478
          total_loss: -0.07911669940998158
          vf_explained_var: -0.24312525987625122
          vf_loss: 7.968293866724707e-05
    num_agent_steps_sampled: 1506000
    num_agent_steps_trained: 1506000
    num_steps_sampled: 1506000
    num_steps_trained: 1506000
  iterations_since_restore: 1506


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1506,37701.1,1506000,0,0,0,316.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1507000
  custom_metrics: {}
  date: 2021-10-09_08-53-22
  done: false
  episode_len_mean: 316.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4238
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4467872513665094
          entropy_coeff: 0.009999999999999998
          kl: 0.010765254852444884
          policy_loss: -0.07626682213611073
          total_loss: -0.07669438823229736
          vf_explained_var: 0.072455994784832
          vf_loss: 5.700930182178531e-05
    num_agent_steps_sampled: 1507000
    num_agent_steps_trained: 1507000
    num_steps_sampled: 1507000
    num_steps_trained: 1507000
  iterations_since_restore: 1507
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1507,37727.8,1507000,0,0,0,316.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1508000
  custom_metrics: {}
  date: 2021-10-09_08-53-48
  done: false
  episode_len_mean: 315.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4241
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7266103519333733
          entropy_coeff: 0.009999999999999998
          kl: 0.007509788620063615
          policy_loss: -0.05582946120347414
          total_loss: -0.06330530739699801
          vf_explained_var: -1.0
          vf_loss: 3.55772746439066e-05
    num_agent_steps_sampled: 1508000
    num_agent_steps_trained: 1508000
    num_steps_sampled: 1508000
    num_steps_trained: 1508000
  iterations_since_restore: 1508
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1508,37753.9,1508000,0,0,0,315.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1509000
  custom_metrics: {}
  date: 2021-10-09_08-54-15
  done: false
  episode_len_mean: 315.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4244
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6475022474924723
          entropy_coeff: 0.009999999999999998
          kl: 0.010284399284678218
          policy_loss: -0.12316237334161997
          total_loss: -0.12621635893980662
          vf_explained_var: -0.22333522140979767
          vf_loss: 6.234093440677194e-05
    num_agent_steps_sampled: 1509000
    num_agent_steps_trained: 1509000
    num_steps_sampled: 1509000
    num_steps_trained: 1509000
  iterations_since_restore: 1509


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1509,37780.4,1509000,0,0,0,315.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1510000
  custom_metrics: {}
  date: 2021-10-09_08-54-38
  done: false
  episode_len_mean: 314.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4247
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7494873086611429
          entropy_coeff: 0.009999999999999998
          kl: 0.008592326980922493
          policy_loss: -0.024908188978830972
          total_loss: -0.031191880359417862
          vf_explained_var: -0.3050609529018402
          vf_loss: 5.036682573012917e-05
    num_agent_steps_sampled: 1510000
    num_agent_steps_trained: 1510000
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
  iterations_since_restore: 1510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1510,37803.3,1510000,0,0,0,314.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1511000
  custom_metrics: {}
  date: 2021-10-09_08-55-03
  done: false
  episode_len_mean: 314.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4250
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.595625642935435
          entropy_coeff: 0.009999999999999998
          kl: 0.005486652611285869
          policy_loss: -0.06899172376013464
          total_loss: -0.07776366769232684
          vf_explained_var: -0.5319672226905823
          vf_loss: 5.754563883076318e-05
    num_agent_steps_sampled: 1511000
    num_agent_steps_trained: 1511000
    num_steps_sampled: 1511000
    num_steps_trained: 1511000
  iterations_since_restore: 1511
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1511,37829.1,1511000,0,0,0,314.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1512000
  custom_metrics: {}
  date: 2021-10-09_08-55-27
  done: false
  episode_len_mean: 315.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4252
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.495347139570448
          entropy_coeff: 0.009999999999999998
          kl: 0.008660244338163241
          policy_loss: -0.11665687445137236
          total_loss: -0.12031985148787498
          vf_explained_var: -0.9235700964927673
          vf_loss: 4.1456148412382594e-05
    num_agent_steps_sampled: 1512000
    num_agent_steps_trained: 1512000
    num_steps_sampled: 1512000
    num_steps_trained: 1512000
  iterations_since_restore: 1512
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1512,37852.5,1512000,0,0,0,315.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1513000
  custom_metrics: {}
  date: 2021-10-09_08-55-54
  done: false
  episode_len_mean: 315.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4256
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.652831146452162
          entropy_coeff: 0.009999999999999998
          kl: 0.008505426752250363
          policy_loss: -0.07005770657625464
          total_loss: -0.07550726955135663
          vf_explained_var: -0.3391067385673523
          vf_loss: 3.080543143975471e-05
    num_agent_steps_sampled: 1513000
    num_agent_steps_trained: 1513000
    num_steps_sampled: 1513000
    num_steps_trained: 1513000
  iterations_since_restore: 1513
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1513,37879.5,1513000,0,0,0,315.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1514000
  custom_metrics: {}
  date: 2021-10-09_08-56-20
  done: false
  episode_len_mean: 314.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4259
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7878158158726163
          entropy_coeff: 0.009999999999999998
          kl: 0.012342313392245865
          policy_loss: -0.1022701609465811
          total_loss: -0.10408996968633599
          vf_explained_var: -0.7916617393493652
          vf_loss: 2.6568211604980104e-05
    num_agent_steps_sampled: 1514000
    num_agent_steps_trained: 1514000
    num_steps_sampled: 1514000
    num_steps_trained: 1514000
  iterations_since_restore: 1514
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1514,37905.8,1514000,0,0,0,314.97




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1515000
  custom_metrics: {}
  date: 2021-10-09_08-57-04
  done: false
  episode_len_mean: 315.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4262
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5869216667281256
          entropy_coeff: 0.009999999999999998
          kl: 0.00998303938976556
          policy_loss: -0.04717844215532144
          total_loss: -0.05005022461215655
          vf_explained_var: -0.6396504044532776
          vf_loss: 3.0181297228813896e-05
    num_agent_steps_sampled: 1515000
    num_agent_steps_trained: 1515000
    num_steps_sampled: 1515000
    num_steps_trained: 1515000
  iterations_since_restore: 1515
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1515,37949.9,1515000,0,0,0,315.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1516000
  custom_metrics: {}
  date: 2021-10-09_08-57-31
  done: false
  episode_len_mean: 313.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4266
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.556583329041799
          entropy_coeff: 0.009999999999999998
          kl: 0.010986134335101304
          policy_loss: -0.06760121112068494
          total_loss: -0.06885564070608881
          vf_explained_var: 0.48014578223228455
          vf_loss: 4.120105224703568e-05
    num_agent_steps_sampled: 1516000
    num_agent_steps_trained: 1516000
    num_steps_sampled: 1516000
    num_steps_trained: 1516000
  iterations_since_restore: 1516
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1516,37977,1516000,0,0,0,313.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1517000
  custom_metrics: {}
  date: 2021-10-09_08-57-58
  done: false
  episode_len_mean: 310.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4269
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7544961439238653
          entropy_coeff: 0.009999999999999998
          kl: 0.009044634292637197
          policy_loss: -0.11050746084915267
          total_loss: -0.11627193113995923
          vf_explained_var: -0.3493092358112335
          vf_loss: 3.215856222595903e-05
    num_agent_steps_sampled: 1517000
    num_agent_steps_trained: 1517000
    num_steps_sampled: 1517000
    num_steps_trained: 1517000
  iterations_since_restore: 1517
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1517,38003.2,1517000,0,0,0,310.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1518000
  custom_metrics: {}
  date: 2021-10-09_08-58-23
  done: false
  episode_len_mean: 310.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4272
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6838080101543003
          entropy_coeff: 0.009999999999999998
          kl: 0.010307699503894697
          policy_loss: -0.08570196156700452
          total_loss: -0.08912335874305831
          vf_explained_var: -0.3686670958995819
          vf_loss: 2.772218337364999e-05
    num_agent_steps_sampled: 1518000
    num_agent_steps_trained: 1518000
    num_steps_sampled: 1518000
    num_steps_trained: 1518000
  iterations_since_restore: 1518
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1518,38028.9,1518000,0,0,0,310.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1519000
  custom_metrics: {}
  date: 2021-10-09_08-58-49
  done: false
  episode_len_mean: 310.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4275
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5377369085947672
          entropy_coeff: 0.009999999999999998
          kl: 0.006665767955747665
          policy_loss: -0.015242577582183812
          total_loss: -0.02194880089826054
          vf_explained_var: 0.21529437601566315
          vf_loss: 1.2790397930631217e-05
    num_agent_steps_sampled: 1519000
    num_agent_steps_trained: 1519000
    num_steps_sampled: 1519000
    num_steps_trained: 1519000
  iterations_since_restore: 1519

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1519,38054.8,1519000,0,0,0,310.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1520000
  custom_metrics: {}
  date: 2021-10-09_08-59-15
  done: false
  episode_len_mean: 309.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4278
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6909389628304377
          entropy_coeff: 0.009999999999999998
          kl: 0.014180145446602666
          policy_loss: -0.06526268089397086
          total_loss: -0.06372017626547151
          vf_explained_var: -0.27443450689315796
          vf_loss: 3.2900667550469126e-05
    num_agent_steps_sampled: 1520000
    num_agent_steps_trained: 1520000
    num_steps_sampled: 1520000
    num_steps_trained: 1520000
  iterations_since_restore: 1520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1520,38080.2,1520000,0,0,0,309.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1521000
  custom_metrics: {}
  date: 2021-10-09_08-59-41
  done: false
  episode_len_mean: 310.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4281
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6862640963660347
          entropy_coeff: 0.009999999999999998
          kl: 0.010653209074336327
          policy_loss: -0.05786530232677857
          total_loss: -0.06085486011579633
          vf_explained_var: -0.10718094557523727
          vf_loss: 3.532698311674823e-05
    num_agent_steps_sampled: 1521000
    num_agent_steps_trained: 1521000
    num_steps_sampled: 1521000
    num_steps_trained: 1521000
  iterations_since_restore: 1521


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1521,38106.4,1521000,0,0,0,310.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1522000
  custom_metrics: {}
  date: 2021-10-09_09-00-07
  done: false
  episode_len_mean: 309.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4284
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8506890575091044
          entropy_coeff: 0.009999999999999998
          kl: 0.011987124226609058
          policy_loss: -0.02688844766881731
          total_loss: -0.02980915399061309
          vf_explained_var: 0.06426794081926346
          vf_loss: 1.576695526637195e-05
    num_agent_steps_sampled: 1522000
    num_agent_steps_trained: 1522000
    num_steps_sampled: 1522000
    num_steps_trained: 1522000
  iterations_since_restore: 1522
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1522,38132.9,1522000,0,0,0,309.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1523000
  custom_metrics: {}
  date: 2021-10-09_09-00-32
  done: false
  episode_len_mean: 310.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4287
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.651899642414517
          entropy_coeff: 0.009999999999999998
          kl: 0.009440599354295465
          policy_loss: 0.01739705502986908
          total_loss: 0.061438256171014574
          vf_explained_var: -0.40822678804397583
          vf_loss: 0.04829753221484781
    num_agent_steps_sampled: 1523000
    num_agent_steps_trained: 1523000
    num_steps_sampled: 1523000
    num_steps_trained: 1523000
  iterations_since_restore: 1523
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1523,38157.4,1523000,-0.05,0,-5,310.72




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1524000
  custom_metrics: {}
  date: 2021-10-09_09-01-16
  done: false
  episode_len_mean: 309.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 4291
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5047031071450976
          entropy_coeff: 0.009999999999999998
          kl: 0.008876175341297414
          policy_loss: -0.06989436662859387
          total_loss: -0.0681623181535138
          vf_explained_var: 0.051253728568553925
          vf_loss: 0.005249567262621389
    num_agent_steps_sampled: 1524000
    num_agent_steps_trained: 1524000
    num_steps_sampled: 1524000
    num_steps_trained: 1524000
  iterations_since_restore: 1524


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1524,38201.6,1524000,-0.05,0,-5,309.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1525000
  custom_metrics: {}
  date: 2021-10-09_09-01-43
  done: false
  episode_len_mean: 310.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6676385627852546
          entropy_coeff: 0.009999999999999998
          kl: 0.010464544607604707
          policy_loss: -0.054484406196408805
          total_loss: -0.05435868302981059
          vf_explained_var: 0.07202967256307602
          vf_loss: 0.0032094139062489075
    num_agent_steps_sampled: 1525000
    num_agent_steps_trained: 1525000
    num_steps_sampled: 1525000
    num_steps_trained: 1525000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1525,38228.2,1525000,-0.05,0,-5,310.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1526000
  custom_metrics: {}
  date: 2021-10-09_09-02-08
  done: false
  episode_len_mean: 309.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4297
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.745977489153544
          entropy_coeff: 0.009999999999999998
          kl: 0.0101460392540567
          policy_loss: -0.08456612061709165
          total_loss: -0.08683336298498842
          vf_explained_var: -0.21785862743854523
          vf_loss: 0.0020135508087049756
    num_agent_steps_sampled: 1526000
    num_agent_steps_trained: 1526000
    num_steps_sampled: 1526000
    num_steps_trained: 1526000
  iterations_since_restore: 1526


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1526,38253,1526000,-0.05,0,-5,309.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1527000
  custom_metrics: {}
  date: 2021-10-09_09-02-29
  done: false
  episode_len_mean: 310.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 4299
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7164961059888204
          entropy_coeff: 0.009999999999999998
          kl: 0.009568815227643333
          policy_loss: -0.09255789799822702
          total_loss: -0.09583475929167536
          vf_explained_var: -0.45232751965522766
          vf_loss: 0.0014588972023274335
    num_agent_steps_sampled: 1527000
    num_agent_steps_trained: 1527000
    num_steps_sampled: 1527000
    num_steps_trained: 1527000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1527,38274.3,1527000,-0.05,0,-5,310.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1528000
  custom_metrics: {}
  date: 2021-10-09_09-02-56
  done: false
  episode_len_mean: 312.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 4303
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5480185177591113
          entropy_coeff: 0.009999999999999998
          kl: 0.0111503833963826
          policy_loss: -0.09328010962862107
          total_loss: -0.09332815344548888
          vf_explained_var: -0.15056051313877106
          vf_loss: 0.0009485893572370211
    num_agent_steps_sampled: 1528000
    num_agent_steps_trained: 1528000
    num_steps_sampled: 1528000
    num_steps_trained: 1528000
  iterations_since_restore: 1528

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1528,38301.1,1528000,-0.05,0,-5,312.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1529000
  custom_metrics: {}
  date: 2021-10-09_09-03-21
  done: false
  episode_len_mean: 313.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4306
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4950223353173997
          entropy_coeff: 0.009999999999999998
          kl: 0.0109142709075323
          policy_loss: -0.040072778943512175
          total_loss: -0.04006389723055893
          vf_explained_var: -0.28436824679374695
          vf_loss: 0.0007822483960707258
    num_agent_steps_sampled: 1529000
    num_agent_steps_trained: 1529000
    num_steps_sampled: 1529000
    num_steps_trained: 1529000
  iterations_since_restore: 1529

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1529,38326.7,1529000,-0.05,0,-5,313.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1530000
  custom_metrics: {}
  date: 2021-10-09_09-03-46
  done: false
  episode_len_mean: 315.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4309
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7957817090882195
          entropy_coeff: 0.009999999999999998
          kl: 0.010186822986694623
          policy_loss: -0.05273900574280156
          total_loss: -0.05601043161004782
          vf_explained_var: -0.2450810819864273
          vf_loss: 0.001454439306528204
    num_agent_steps_sampled: 1530000
    num_agent_steps_trained: 1530000
    num_steps_sampled: 1530000
    num_steps_trained: 1530000
  iterations_since_restore: 1530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1530,38351.1,1530000,-0.05,0,-5,315.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1531000
  custom_metrics: {}
  date: 2021-10-09_09-04-10
  done: false
  episode_len_mean: 315.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 4311
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5578129808108012
          entropy_coeff: 0.009999999999999998
          kl: 0.010468888650776935
          policy_loss: -0.07793899549999171
          total_loss: -0.07929061517740289
          vf_explained_var: -0.31380268931388855
          vf_loss: 0.0006281729299290519
    num_agent_steps_sampled: 1531000
    num_agent_steps_trained: 1531000
    num_steps_sampled: 1531000
    num_steps_trained: 1531000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1531,38374.9,1531000,-0.05,0,-5,315.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1532000
  custom_metrics: {}
  date: 2021-10-09_09-04-37
  done: false
  episode_len_mean: 315.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 4315
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5795955975850424
          entropy_coeff: 0.009999999999999998
          kl: 0.007972725628870907
          policy_loss: -0.033416244015097615
          total_loss: -0.03850132932679521
          vf_explained_var: -0.13986721634864807
          vf_loss: 0.00035487023495887925
    num_agent_steps_sampled: 1532000
    num_agent_steps_trained: 1532000
    num_steps_sampled: 1532000
    num_steps_trained: 1532000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1532,38402.2,1532000,-0.05,0,-5,315.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1533000
  custom_metrics: {}
  date: 2021-10-09_09-05-00
  done: false
  episode_len_mean: 316.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4318
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6823293685913085
          entropy_coeff: 0.009999999999999998
          kl: 0.007731993340937601
          policy_loss: -0.04100694914037983
          total_loss: -0.04724055843220817
          vf_explained_var: -0.8172131776809692
          vf_loss: 0.0005463782345436306
    num_agent_steps_sampled: 1533000
    num_agent_steps_trained: 1533000
    num_steps_sampled: 1533000
    num_steps_trained: 1533000
  iterations_since_restore: 153

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1533,38425.7,1533000,-0.05,0,-5,316.94




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1534000
  custom_metrics: {}
  date: 2021-10-09_09-05-44
  done: false
  episode_len_mean: 317.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4321
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5018938024838766
          entropy_coeff: 0.009999999999999998
          kl: 0.00847780578230173
          policy_loss: -0.09593821358349588
          total_loss: -0.09966139201488759
          vf_explained_var: -0.11671849340200424
          vf_loss: 0.0002836944406024284
    num_agent_steps_sampled: 1534000
    num_agent_steps_trained: 1534000
    num_steps_sampled: 1534000
    num_steps_trained: 1534000
  iterations_since_restore: 153

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1534,38469.5,1534000,-0.05,0,-5,317.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1535000
  custom_metrics: {}
  date: 2021-10-09_09-06-09
  done: false
  episode_len_mean: 318.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4324
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6382754074202643
          entropy_coeff: 0.009999999999999998
          kl: 0.010632893244259181
          policy_loss: -0.1465672068297863
          total_loss: -0.14882408943441178
          vf_explained_var: 0.24566777050495148
          vf_loss: 0.0003145021544898757
    num_agent_steps_sampled: 1535000
    num_agent_steps_trained: 1535000
    num_steps_sampled: 1535000
    num_steps_trained: 1535000
  iterations_since_restore: 1535

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1535,38494.2,1535000,-0.05,0,-5,318.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1536000
  custom_metrics: {}
  date: 2021-10-09_09-06-36
  done: false
  episode_len_mean: 319.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4327
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7174014117982652
          entropy_coeff: 0.009999999999999998
          kl: 0.007394849236222889
          policy_loss: -0.041469946482943165
          total_loss: -0.04855091847065422
          vf_explained_var: -0.2687840461730957
          vf_loss: 0.00048766248203012055
    num_agent_steps_sampled: 1536000
    num_agent_steps_trained: 1536000
    num_steps_sampled: 1536000
    num_steps_trained: 1536000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1536,38521,1536000,-0.05,0,-5,319.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1537000
  custom_metrics: {}
  date: 2021-10-09_09-07-00
  done: false
  episode_len_mean: 321.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4330
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4464815417925516
          entropy_coeff: 0.009999999999999998
          kl: 0.008668981903507166
          policy_loss: -0.14128043179710706
          total_loss: -0.1441548256824414
          vf_explained_var: -0.17944853007793427
          vf_loss: 0.000330032497570048
    num_agent_steps_sampled: 1537000
    num_agent_steps_trained: 1537000
    num_steps_sampled: 1537000
    num_steps_trained: 1537000
  iterations_since_restore: 1537

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1537,38545.4,1537000,-0.05,0,-5,321.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1538000
  custom_metrics: {}
  date: 2021-10-09_09-07-25
  done: false
  episode_len_mean: 322.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4333
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5516051914956834
          entropy_coeff: 0.009999999999999998
          kl: 0.005563419707482906
          policy_loss: -0.02591629813735684
          total_loss: -0.03381439629528257
          vf_explained_var: -0.7371851801872253
          vf_loss: 0.00039146890930068267
    num_agent_steps_sampled: 1538000
    num_agent_steps_trained: 1538000
    num_steps_sampled: 1538000
    num_steps_trained: 1538000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1538,38570.7,1538000,-0.05,0,-5,322.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1539000
  custom_metrics: {}
  date: 2021-10-09_09-07-50
  done: false
  episode_len_mean: 324.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4336
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.733160834842258
          entropy_coeff: 0.009999999999999998
          kl: 0.010260687697807608
          policy_loss: -0.08986620286272631
          total_loss: -0.09354822285887268
          vf_explained_var: -0.21209032833576202
          vf_loss: 0.00032169069389864386
    num_agent_steps_sampled: 1539000
    num_agent_steps_trained: 1539000
    num_steps_sampled: 1539000
    num_steps_trained: 1539000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1539,38594.8,1539000,-0.05,0,-5,324.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1540000
  custom_metrics: {}
  date: 2021-10-09_09-08-16
  done: false
  episode_len_mean: 324.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4339
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.5854961117108664
          entropy_coeff: 0.009999999999999998
          kl: 0.009313689413713414
          policy_loss: -0.0516485018034776
          total_loss: -0.05515669286251068
          vf_explained_var: -0.10419338941574097
          vf_loss: 0.00024895247374337893
    num_agent_steps_sampled: 1540000
    num_agent_steps_trained: 1540000
    num_steps_sampled: 1540000
    num_steps_trained: 1540000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1540,38621.6,1540000,-0.05,0,-5,324.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1541000
  custom_metrics: {}
  date: 2021-10-09_09-08-41
  done: false
  episode_len_mean: 324.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4342
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6262533240848118
          entropy_coeff: 0.009999999999999998
          kl: 0.00839026382948435
          policy_loss: -0.08905696684701575
          total_loss: -0.09426548766593139
          vf_explained_var: -0.7431522011756897
          vf_loss: 0.0001556583249314119
    num_agent_steps_sampled: 1541000
    num_agent_steps_trained: 1541000
    num_steps_sampled: 1541000
    num_steps_trained: 1541000
  iterations_since_restore: 1541

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1541,38645.9,1541000,-0.05,0,-5,324.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1542000
  custom_metrics: {}
  date: 2021-10-09_09-09-04
  done: false
  episode_len_mean: 324.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4345
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7354831973711649
          entropy_coeff: 0.009999999999999998
          kl: 0.009964467261534042
          policy_loss: -0.11046163108613756
          total_loss: -0.11470898650586606
          vf_explained_var: -0.6950640678405762
          vf_loss: 0.00016434759818366728
    num_agent_steps_sampled: 1542000
    num_agent_steps_trained: 1542000
    num_steps_sampled: 1542000
    num_steps_trained: 1542000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1542,38669.5,1542000,-0.05,0,-5,324.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1543000
  custom_metrics: {}
  date: 2021-10-09_09-09-32
  done: false
  episode_len_mean: 323.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 4349
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.3943905856874255
          entropy_coeff: 0.009999999999999998
          kl: 0.00844040076218765
          policy_loss: -0.036112126852903104
          total_loss: -0.038989016724129515
          vf_explained_var: -0.3689631521701813
          vf_loss: 0.00010354152642927753
    num_agent_steps_sampled: 1543000
    num_agent_steps_trained: 1543000
    num_steps_sampled: 1543000
    num_steps_trained: 1543000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1543,38696.8,1543000,-0.05,0,-5,323.67




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1544000
  custom_metrics: {}
  date: 2021-10-09_09-10-13
  done: false
  episode_len_mean: 323.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4352
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.6663640181223551
          entropy_coeff: 0.009999999999999998
          kl: 0.011270173818919232
          policy_loss: -0.04114157874137163
          total_loss: -0.042981385046409235
          vf_explained_var: -0.4013735353946686
          vf_loss: 0.00018468646796665982
    num_agent_steps_sampled: 1544000
    num_agent_steps_trained: 1544000
    num_steps_sampled: 1544000
    num_steps_trained: 1544000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1544,38738,1544000,-0.05,0,-5,323.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1545000
  custom_metrics: {}
  date: 2021-10-09_09-10-41
  done: false
  episode_len_mean: 321.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4355
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4548095252778794
          entropy_coeff: 0.009999999999999998
          kl: 0.009815893952359368
          policy_loss: -0.05603346774975459
          total_loss: -0.05773524956570731
          vf_explained_var: -0.09651477634906769
          vf_loss: 9.617168183871804e-05
    num_agent_steps_sampled: 1545000
    num_agent_steps_trained: 1545000
    num_steps_sampled: 1545000
    num_steps_trained: 1545000
  iterations_since_restore: 154

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1545,38765.6,1545000,-0.05,0,-5,321.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1546000
  custom_metrics: {}
  date: 2021-10-09_09-11-06
  done: false
  episode_len_mean: 322.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4358
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7975570638974507
          entropy_coeff: 0.009999999999999998
          kl: 0.010818956032472949
          policy_loss: -0.0817845003058513
          total_loss: -0.08546986488832367
          vf_explained_var: -0.6681343913078308
          vf_loss: 0.00023715866393306188
    num_agent_steps_sampled: 1546000
    num_agent_steps_trained: 1546000
    num_steps_sampled: 1546000
    num_steps_trained: 1546000
  iterations_since_restore: 154

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1546,38790.5,1546000,-0.05,0,-5,322.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1547000
  custom_metrics: {}
  date: 2021-10-09_09-11-31
  done: false
  episode_len_mean: 323.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4361
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7283411754502191
          entropy_coeff: 0.009999999999999998
          kl: 0.012713909010614642
          policy_loss: -0.13058622512552473
          total_loss: -0.13122131803797352
          vf_explained_var: -0.34840428829193115
          vf_loss: 0.00013386305662505847
    num_agent_steps_sampled: 1547000
    num_agent_steps_trained: 1547000
    num_steps_sampled: 1547000
    num_steps_trained: 1547000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1547,38816.3,1547000,-0.05,0,-5,323.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1548000
  custom_metrics: {}
  date: 2021-10-09_09-11-55
  done: false
  episode_len_mean: 324.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4364
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7114239745669895
          entropy_coeff: 0.009999999999999998
          kl: 0.011348885746102313
          policy_loss: -0.15854970707247654
          total_loss: -0.1607677929931217
          vf_explained_var: -0.7084887027740479
          vf_loss: 0.00015476273769713265
    num_agent_steps_sampled: 1548000
    num_agent_steps_trained: 1548000
    num_steps_sampled: 1548000
    num_steps_trained: 1548000
  iterations_since_restore: 154

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1548,38840.2,1548000,-0.05,0,-5,324.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1549000
  custom_metrics: {}
  date: 2021-10-09_09-12-22
  done: false
  episode_len_mean: 324.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4367
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4833443363507588
          entropy_coeff: 0.009999999999999998
          kl: 0.009836400604366899
          policy_loss: -0.034691255622439916
          total_loss: -0.03665060765213436
          vf_explained_var: 0.20163550972938538
          vf_loss: 9.731297733297752e-05
    num_agent_steps_sampled: 1549000
    num_agent_steps_trained: 1549000
    num_steps_sampled: 1549000
    num_steps_trained: 1549000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1549,38866.9,1549000,-0.05,0,-5,324.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1550000
  custom_metrics: {}
  date: 2021-10-09_09-12-45
  done: false
  episode_len_mean: 324.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4370
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7786653492185804
          entropy_coeff: 0.009999999999999998
          kl: 0.004738203527917189
          policy_loss: -0.17110413908958436
          total_loss: -0.1682378343409962
          vf_explained_var: -0.4387253522872925
          vf_loss: 0.014498371170904849
    num_agent_steps_sampled: 1550000
    num_agent_steps_trained: 1550000
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
  iterations_since_restore: 1550
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1550,38890.4,1550000,-0.1,0,-5,324.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1551000
  custom_metrics: {}
  date: 2021-10-09_09-13-10
  done: false
  episode_len_mean: 326.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4373
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8157618284225463
          entropy_coeff: 0.009999999999999998
          kl: 0.014047869908177487
          policy_loss: -0.10482245553284883
          total_loss: -0.1121004425196184
          vf_explained_var: 0.09863575547933578
          vf_loss: 0.0017560433712787925
    num_agent_steps_sampled: 1551000
    num_agent_steps_trained: 1551000
    num_steps_sampled: 1551000
    num_steps_trained: 1551000
  iterations_since_restore: 1551


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1551,38915,1551000,-0.1,0,-5,326.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1552000
  custom_metrics: {}
  date: 2021-10-09_09-13-35
  done: false
  episode_len_mean: 326.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4376
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8310014247894286
          entropy_coeff: 0.009999999999999998
          kl: 0.01640690137981144
          policy_loss: -0.07411567394932111
          total_loss: -0.0807906253884236
          vf_explained_var: -0.7986562848091125
          vf_loss: 0.0009793705804946107
    num_agent_steps_sampled: 1552000
    num_agent_steps_trained: 1552000
    num_steps_sampled: 1552000
    num_steps_trained: 1552000
  iterations_since_restore: 1552
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1552,38940,1552000,-0.1,0,-5,326.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1553000
  custom_metrics: {}
  date: 2021-10-09_09-13-59
  done: false
  episode_len_mean: 327.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4379
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7674698021676805
          entropy_coeff: 0.009999999999999998
          kl: 0.014864415471104871
          policy_loss: -0.10294149985743893
          total_loss: -0.11044732868257497
          vf_explained_var: -0.7162997722625732
          vf_loss: 0.0005149625041263385
    num_agent_steps_sampled: 1553000
    num_agent_steps_trained: 1553000
    num_steps_sampled: 1553000
    num_steps_trained: 1553000
  iterations_since_restore: 1553

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1553,38964,1553000,-0.1,0,-5,327.19




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1554000
  custom_metrics: {}
  date: 2021-10-09_09-14-42
  done: false
  episode_len_mean: 327.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4382
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7927773608101738
          entropy_coeff: 0.009999999999999998
          kl: 0.014069738414673101
          policy_loss: 0.00685476182649533
          total_loss: -0.0014752424425548978
          vf_explained_var: -0.42588603496551514
          vf_loss: 0.0004599789553645274
    num_agent_steps_sampled: 1554000
    num_agent_steps_trained: 1554000
    num_steps_sampled: 1554000
    num_steps_trained: 1554000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1554,39006.8,1554000,-0.1,0,-5,327.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1555000
  custom_metrics: {}
  date: 2021-10-09_09-15-10
  done: false
  episode_len_mean: 326.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4385
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5716342806816102
          entropy_coeff: 0.009999999999999998
          kl: 0.012619785057437927
          policy_loss: -0.09770782672696643
          total_loss: -0.10497694479094612
          vf_explained_var: 0.0979609489440918
          vf_loss: 0.00025112936232795215
    num_agent_steps_sampled: 1555000
    num_agent_steps_trained: 1555000
    num_steps_sampled: 1555000
    num_steps_trained: 1555000
  iterations_since_restore: 1555

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1555,39035.1,1555000,-0.1,0,-5,326.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1556000
  custom_metrics: {}
  date: 2021-10-09_09-15-33
  done: false
  episode_len_mean: 326.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4388
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8448158701260884
          entropy_coeff: 0.009999999999999998
          kl: 0.01685353906287277
          policy_loss: -0.15954503611558013
          total_loss: -0.16672890997595258
          vf_explained_var: -0.006488911807537079
          vf_loss: 0.00031851400029457485
    num_agent_steps_sampled: 1556000
    num_agent_steps_trained: 1556000
    num_steps_sampled: 1556000
    num_steps_trained: 1556000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1556,39057.5,1556000,-0.05,0,-5,326.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1557000
  custom_metrics: {}
  date: 2021-10-09_09-15-59
  done: false
  episode_len_mean: 327.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4391
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7044554154078166
          entropy_coeff: 0.009999999999999998
          kl: 0.019587301704757247
          policy_loss: -0.06342063269888361
          total_loss: -0.06739630212800371
          vf_explained_var: -0.22391726076602936
          vf_loss: 0.0003476333946713971
    num_agent_steps_sampled: 1557000
    num_agent_steps_trained: 1557000
    num_steps_sampled: 1557000
    num_steps_trained: 1557000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1557,39083.9,1557000,-0.05,0,-5,327.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1558000
  custom_metrics: {}
  date: 2021-10-09_09-16-24
  done: false
  episode_len_mean: 328.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4394
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6994076821539137
          entropy_coeff: 0.009999999999999998
          kl: 0.015032573206616818
          policy_loss: -0.07660749458397428
          total_loss: -0.0835990587559839
          vf_explained_var: -0.614510715007782
          vf_loss: 0.0002393949504443703
    num_agent_steps_sampled: 1558000
    num_agent_steps_trained: 1558000
    num_steps_sampled: 1558000
    num_steps_trained: 1558000
  iterations_since_restore: 1558


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1558,39108.9,1558000,-0.05,0,-5,328.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1559000
  custom_metrics: {}
  date: 2021-10-09_09-16-48
  done: false
  episode_len_mean: 329.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4397
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7387216885884602
          entropy_coeff: 0.009999999999999998
          kl: 0.017277260721229263
          policy_loss: -0.11688318732712004
          total_loss: -0.12261506339742077
          vf_explained_var: -0.2174910455942154
          vf_loss: 0.00043437645840135197
    num_agent_steps_sampled: 1559000
    num_agent_steps_trained: 1559000
    num_steps_sampled: 1559000
    num_steps_trained: 1559000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1559,39132.9,1559000,-0.05,0,-5,329.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1560000
  custom_metrics: {}
  date: 2021-10-09_09-17-12
  done: false
  episode_len_mean: 328.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4400
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6483213623364767
          entropy_coeff: 0.009999999999999998
          kl: 0.014189333919329172
          policy_loss: -0.0977936789393425
          total_loss: -0.10478602507048183
          vf_explained_var: -0.3445121645927429
          vf_loss: 0.00027540347437025046
    num_agent_steps_sampled: 1560000
    num_agent_steps_trained: 1560000
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
  iterations_since_restore: 156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1560,39156.6,1560000,-0.05,0,-5,328.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1561000
  custom_metrics: {}
  date: 2021-10-09_09-17-36
  done: false
  episode_len_mean: 328.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 4403
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8236006458600362
          entropy_coeff: 0.009999999999999998
          kl: 0.013090720743691225
          policy_loss: -0.09996501248743798
          total_loss: -0.10949556092835135
          vf_explained_var: -0.5122944116592407
          vf_loss: 0.00020350299406951915
    num_agent_steps_sampled: 1561000
    num_agent_steps_trained: 1561000
    num_steps_sampled: 1561000
    num_steps_trained: 1561000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1561,39180.7,1561000,-0.05,0,-5,328.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1562000
  custom_metrics: {}
  date: 2021-10-09_09-18-02
  done: false
  episode_len_mean: 328.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4406
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.3406251390775046
          entropy_coeff: 0.009999999999999998
          kl: 0.014051964929446686
          policy_loss: 0.019739779912763173
          total_loss: 0.0933851974705855
          vf_explained_var: 0.1830453723669052
          vf_loss: 0.0779254199574805
    num_agent_steps_sampled: 1562000
    num_agent_steps_trained: 1562000
    num_steps_sampled: 1562000
    num_steps_trained: 1562000
  iterations_since_restore: 1562
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1562,39207.1,1562000,-0.12,0,-7,328.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1563000
  custom_metrics: {}
  date: 2021-10-09_09-18-25
  done: false
  episode_len_mean: 329.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4409
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.715095341205597
          entropy_coeff: 0.009999999999999998
          kl: 0.014181823281891326
          policy_loss: -0.07692274165650209
          total_loss: -0.07259417416320907
          vf_explained_var: 0.49445778131484985
          vf_loss: 0.012268932463808193
    num_agent_steps_sampled: 1563000
    num_agent_steps_trained: 1563000
    num_steps_sampled: 1563000
    num_steps_trained: 1563000
  iterations_since_restore: 1563


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1563,39229.3,1563000,-0.12,0,-7,329.93




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1564000
  custom_metrics: {}
  date: 2021-10-09_09-19-06
  done: false
  episode_len_mean: 330.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4411
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7864372809727986
          entropy_coeff: 0.009999999999999998
          kl: 0.016892584981543782
          policy_loss: -0.08784999731514188
          total_loss: -0.09087552295790778
          vf_explained_var: -0.8272566795349121
          vf_loss: 0.003867718488133202
    num_agent_steps_sampled: 1564000
    num_agent_steps_trained: 1564000
    num_steps_sampled: 1564000
    num_steps_trained: 1564000
  iterations_since_restore: 1564

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1564,39270.5,1564000,-0.12,0,-7,330.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1565000
  custom_metrics: {}
  date: 2021-10-09_09-19-30
  done: false
  episode_len_mean: 330.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4414
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.702647907204098
          entropy_coeff: 0.009999999999999998
          kl: 0.01728781200419702
          policy_loss: -0.1730322464886639
          total_loss: -0.1763655039171378
          vf_explained_var: -0.5830909013748169
          vf_loss: 0.0024654060485772788
    num_agent_steps_sampled: 1565000
    num_agent_steps_trained: 1565000
    num_steps_sampled: 1565000
    num_steps_trained: 1565000
  iterations_since_restore: 1565
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1565,39295.1,1565000,-0.12,0,-7,330.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1566000
  custom_metrics: {}
  date: 2021-10-09_09-19-52
  done: false
  episode_len_mean: 332.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4417
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.659550760851966
          entropy_coeff: 0.009999999999999998
          kl: 0.01079306409547619
          policy_loss: 0.021097106393426657
          total_loss: 0.013145657173461384
          vf_explained_var: -0.6232244372367859
          vf_loss: 0.0016343495459295808
    num_agent_steps_sampled: 1566000
    num_agent_steps_trained: 1566000
    num_steps_sampled: 1566000
    num_steps_trained: 1566000
  iterations_since_restore: 1566


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1566,39316.7,1566000,-0.12,0,-7,332.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1567000
  custom_metrics: {}
  date: 2021-10-09_09-20-15
  done: false
  episode_len_mean: 333.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4420
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.753547183672587
          entropy_coeff: 0.009999999999999998
          kl: 0.016925953764379012
          policy_loss: -0.11289314718710051
          total_loss: -0.11790803107950422
          vf_explained_var: -0.14694082736968994
          vf_loss: 0.0015277857445956519
    num_agent_steps_sampled: 1567000
    num_agent_steps_trained: 1567000
    num_steps_sampled: 1567000
    num_steps_trained: 1567000
  iterations_since_restore: 156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1567,39340,1567000,-0.12,0,-7,333.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1568000
  custom_metrics: {}
  date: 2021-10-09_09-20-42
  done: false
  episode_len_mean: 333.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4423
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5705754240353902
          entropy_coeff: 0.009999999999999998
          kl: 0.01289396716580263
          policy_loss: -0.05559395529950659
          total_loss: -0.06212263077290522
          vf_explained_var: -0.7546579837799072
          vf_loss: 0.0008029089729663813
    num_agent_steps_sampled: 1568000
    num_agent_steps_trained: 1568000
    num_steps_sampled: 1568000
    num_steps_trained: 1568000
  iterations_since_restore: 1568


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1568,39366.7,1568000,-0.12,0,-7,333.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1569000
  custom_metrics: {}
  date: 2021-10-09_09-21-05
  done: false
  episode_len_mean: 334.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4426
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5973981963263617
          entropy_coeff: 0.009999999999999998
          kl: 0.016024125698587258
          policy_loss: -0.12612185051871672
          total_loss: -0.1301610371304883
          vf_explained_var: -0.016560830175876617
          vf_loss: 0.0015277020594415566
    num_agent_steps_sampled: 1569000
    num_agent_steps_trained: 1569000
    num_steps_sampled: 1569000
    num_steps_trained: 1569000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1569,39390,1569000,-0.12,0,-7,334.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1570000
  custom_metrics: {}
  date: 2021-10-09_09-21-30
  done: false
  episode_len_mean: 334.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4429
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8876978052986992
          entropy_coeff: 0.009999999999999998
          kl: 0.015003024067113438
          policy_loss: -0.029251216310593818
          total_loss: -0.0367647148668766
          vf_explained_var: -0.1491224318742752
          vf_loss: 0.0016195528422637531
    num_agent_steps_sampled: 1570000
    num_agent_steps_trained: 1570000
    num_steps_sampled: 1570000
    num_steps_trained: 1570000
  iterations_since_restore: 157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1570,39414.4,1570000,-0.12,0,-7,334.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1571000
  custom_metrics: {}
  date: 2021-10-09_09-21-57
  done: false
  episode_len_mean: 333.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4432
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.310549392302831
          entropy_coeff: 0.009999999999999998
          kl: 0.01633776620111986
          policy_loss: 0.009810080793168809
          total_loss: 0.00772565859887335
          vf_explained_var: 0.5701010227203369
          vf_loss: 0.00041027972151318356
    num_agent_steps_sampled: 1571000
    num_agent_steps_trained: 1571000
    num_steps_sampled: 1571000
    num_steps_trained: 1571000
  iterations_since_restore: 1571
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1571,39441.1,1571000,-0.12,0,-7,333.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1572000
  custom_metrics: {}
  date: 2021-10-09_09-22-18
  done: false
  episode_len_mean: 334.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4435
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7297975248760646
          entropy_coeff: 0.009999999999999998
          kl: 0.016569457975698348
          policy_loss: -0.07569006697999106
          total_loss: -0.08140940194328626
          vf_explained_var: -0.12461137771606445
          vf_loss: 0.0008173692357053773
    num_agent_steps_sampled: 1572000
    num_agent_steps_trained: 1572000
    num_steps_sampled: 1572000
    num_steps_trained: 1572000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1572,39462.9,1572000,-0.12,0,-7,334.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1573000
  custom_metrics: {}
  date: 2021-10-09_09-22-39
  done: false
  episode_len_mean: 336.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4437
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7162925362586976
          entropy_coeff: 0.009999999999999998
          kl: 0.01338443060769359
          policy_loss: -0.06663544074528747
          total_loss: -0.07463224273588923
          vf_explained_var: -0.90995854139328
          vf_loss: 0.00047341812605736775
    num_agent_steps_sampled: 1573000
    num_agent_steps_trained: 1573000
    num_steps_sampled: 1573000
    num_steps_trained: 1573000
  iterations_since_restore: 1573


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1573,39483.7,1573000,-0.12,0,-7,336.51




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1574000
  custom_metrics: {}
  date: 2021-10-09_09-23-19
  done: false
  episode_len_mean: 338.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4440
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5684445950720045
          entropy_coeff: 0.009999999999999998
          kl: 0.00982834517018579
          policy_loss: -0.1124004275434547
          total_loss: -0.12100220426089234
          vf_explained_var: -0.9025248885154724
          vf_loss: 0.0006995130860660639
    num_agent_steps_sampled: 1574000
    num_agent_steps_trained: 1574000
    num_steps_sampled: 1574000
    num_steps_trained: 1574000
  iterations_since_restore: 1574


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1574,39523,1574000,-0.12,0,-7,338.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1575000
  custom_metrics: {}
  date: 2021-10-09_09-23-44
  done: false
  episode_len_mean: 337.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4443
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6036701718966166
          entropy_coeff: 0.009999999999999998
          kl: 0.01358298848875879
          policy_loss: -0.07084000135461489
          total_loss: -0.07692867488496834
          vf_explained_var: -0.4785032868385315
          vf_loss: 0.0011263657429177935
    num_agent_steps_sampled: 1575000
    num_agent_steps_trained: 1575000
    num_steps_sampled: 1575000
    num_steps_trained: 1575000
  iterations_since_restore: 1575

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1575,39548.3,1575000,-0.12,0,-7,337.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1576000
  custom_metrics: {}
  date: 2021-10-09_09-24-08
  done: false
  episode_len_mean: 337.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4446
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5786970615386964
          entropy_coeff: 0.009999999999999998
          kl: 0.015134413762789794
          policy_loss: -0.1122575040285786
          total_loss: -0.11728738260765871
          vf_explained_var: -0.25406745076179504
          vf_loss: 0.0009278291176694135
    num_agent_steps_sampled: 1576000
    num_agent_steps_trained: 1576000
    num_steps_sampled: 1576000
    num_steps_trained: 1576000
  iterations_since_restore: 157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1576,39571.9,1576000,-0.12,0,-7,337.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1577000
  custom_metrics: {}
  date: 2021-10-09_09-24-29
  done: false
  episode_len_mean: 340.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4448
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.696791402498881
          entropy_coeff: 0.009999999999999998
          kl: 0.01169472250442042
          policy_loss: -0.09663703329861165
          total_loss: -0.10559341191417641
          vf_explained_var: -0.8265739679336548
          vf_loss: 0.0004162305403345575
    num_agent_steps_sampled: 1577000
    num_agent_steps_trained: 1577000
    num_steps_sampled: 1577000
    num_steps_trained: 1577000
  iterations_since_restore: 1577


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1577,39593.8,1577000,-0.12,0,-7,340.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1578000
  custom_metrics: {}
  date: 2021-10-09_09-24-54
  done: false
  episode_len_mean: 341.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4451
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6921436852878995
          entropy_coeff: 0.009999999999999998
          kl: 0.01623776582019306
          policy_loss: -0.11017155759036541
          total_loss: -0.1155346941202879
          vf_explained_var: -0.17066910862922668
          vf_loss: 0.001012451443885867
    num_agent_steps_sampled: 1578000
    num_agent_steps_trained: 1578000
    num_steps_sampled: 1578000
    num_steps_trained: 1578000
  iterations_since_restore: 1578


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1578,39618.4,1578000,-0.12,0,-7,341.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1579000
  custom_metrics: {}
  date: 2021-10-09_09-25-19
  done: false
  episode_len_mean: 342.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4454
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7343488653500876
          entropy_coeff: 0.009999999999999998
          kl: 0.0107806496309954
          policy_loss: -0.012494465957085291
          total_loss: -0.02262326031923294
          vf_explained_var: -0.9821412563323975
          vf_loss: 0.00021304993489239778
    num_agent_steps_sampled: 1579000
    num_agent_steps_trained: 1579000
    num_steps_sampled: 1579000
    num_steps_trained: 1579000
  iterations_since_restore: 157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1579,39643,1579000,-0.12,0,-7,342.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1580000
  custom_metrics: {}
  date: 2021-10-09_09-25-42
  done: false
  episode_len_mean: 342.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4457
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8989849130312602
          entropy_coeff: 0.009999999999999998
          kl: 0.017793321232615532
          policy_loss: -0.0923931342135701
          total_loss: -0.09943981420041786
          vf_explained_var: -0.5525200963020325
          vf_loss: 0.00038704287225199447
    num_agent_steps_sampled: 1580000
    num_agent_steps_trained: 1580000
    num_steps_sampled: 1580000
    num_steps_trained: 1580000
  iterations_since_restore: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1580,39666.5,1580000,-0.12,0,-7,342.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1581000
  custom_metrics: {}
  date: 2021-10-09_09-26-03
  done: false
  episode_len_mean: 344.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4460
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8258880694707236
          entropy_coeff: 0.009999999999999998
          kl: 0.016055941976003397
          policy_loss: -0.06108979516559177
          total_loss: -0.06848756281865967
          vf_explained_var: -0.8180670738220215
          vf_loss: 0.00043335327403231836
    num_agent_steps_sampled: 1581000
    num_agent_steps_trained: 1581000
    num_steps_sampled: 1581000
    num_steps_trained: 1581000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1581,39687.8,1581000,-0.12,0,-7,344.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1582000
  custom_metrics: {}
  date: 2021-10-09_09-26-28
  done: false
  episode_len_mean: 344.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4462
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.9043320854504904
          entropy_coeff: 0.009999999999999998
          kl: 0.016514648222242675
          policy_loss: -0.10469691254612472
          total_loss: -0.11273497454822064
          vf_explained_var: -0.4519033133983612
          vf_loss: 0.0002795861010478499
    num_agent_steps_sampled: 1582000
    num_agent_steps_trained: 1582000
    num_steps_sampled: 1582000
    num_steps_trained: 1582000
  iterations_since_restore: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1582,39711.9,1582000,-0.12,0,-7,344.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1583000
  custom_metrics: {}
  date: 2021-10-09_09-26-53
  done: false
  episode_len_mean: 344.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 4466
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7389856020609538
          entropy_coeff: 0.009999999999999998
          kl: 0.013146684716593545
          policy_loss: 0.016466592169470256
          total_loss: 0.007758716866374016
          vf_explained_var: -0.13646553456783295
          vf_loss: 0.00014368117780476394
    num_agent_steps_sampled: 1583000
    num_agent_steps_trained: 1583000
    num_steps_sampled: 1583000
    num_steps_trained: 1583000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1583,39737.2,1583000,-0.12,0,-7,344.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1584000
  custom_metrics: {}
  date: 2021-10-09_09-27-16
  done: false
  episode_len_mean: 345.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4468
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8012349976433648
          entropy_coeff: 0.009999999999999998
          kl: 0.014458456629480192
          policy_loss: -0.004566146598921882
          total_loss: -0.012970035605960422
          vf_explained_var: -0.8394749760627747
          vf_loss: 0.00021821226125919363
    num_agent_steps_sampled: 1584000
    num_agent_steps_trained: 1584000
    num_steps_sampled: 1584000
    num_steps_trained: 1584000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1584,39760.6,1584000,-0.12,0,-7,345.81




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1585000
  custom_metrics: {}
  date: 2021-10-09_09-27-58
  done: false
  episode_len_mean: 345.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4471
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6381256977717082
          entropy_coeff: 0.009999999999999998
          kl: 0.012049878086036782
          policy_loss: -0.013050603804488976
          total_loss: -0.02142846311132113
          vf_explained_var: -0.4217802584171295
          vf_loss: 0.00017743270275079543
    num_agent_steps_sampled: 1585000
    num_agent_steps_trained: 1585000
    num_steps_sampled: 1585000
    num_steps_trained: 1585000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1585,39802.4,1585000,-0.07,0,-7,345.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1586000
  custom_metrics: {}
  date: 2021-10-09_09-28-22
  done: false
  episode_len_mean: 346.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4474
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8673397673500909
          entropy_coeff: 0.009999999999999998
          kl: 0.012162743797551744
          policy_loss: -0.0825022202399042
          total_loss: -0.0929828323630823
          vf_explained_var: -0.7181033492088318
          vf_loss: 0.0002935204641996986
    num_agent_steps_sampled: 1586000
    num_agent_steps_trained: 1586000
    num_steps_sampled: 1586000
    num_steps_trained: 1586000
  iterations_since_restore: 1586


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1586,39826.4,1586000,-0.07,0,-7,346.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1587000
  custom_metrics: {}
  date: 2021-10-09_09-28-46
  done: false
  episode_len_mean: 346.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4477
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8294724424680073
          entropy_coeff: 0.009999999999999998
          kl: 0.016746689103437795
          policy_loss: -0.10695631160504288
          total_loss: -0.11419809758663177
          vf_explained_var: -0.5532010197639465
          vf_loss: 0.0001765612428749187
    num_agent_steps_sampled: 1587000
    num_agent_steps_trained: 1587000
    num_steps_sampled: 1587000
    num_steps_trained: 1587000
  iterations_since_restore: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1587,39850.6,1587000,-0.07,0,-7,346.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1588000
  custom_metrics: {}
  date: 2021-10-09_09-29-11
  done: false
  episode_len_mean: 346.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4480
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7417418599128722
          entropy_coeff: 0.009999999999999998
          kl: 0.01690694075448253
          policy_loss: -0.09626863681607777
          total_loss: -0.10238276980817318
          vf_explained_var: -0.1291782408952713
          vf_loss: 0.0003228339184715878
    num_agent_steps_sampled: 1588000
    num_agent_steps_trained: 1588000
    num_steps_sampled: 1588000
    num_steps_trained: 1588000
  iterations_since_restore: 1588


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1588,39875,1588000,-0.07,0,-7,346.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1589000
  custom_metrics: {}
  date: 2021-10-09_09-29-33
  done: false
  episode_len_mean: 348.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4483
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6720849964353772
          entropy_coeff: 0.009999999999999998
          kl: 0.016148366069620737
          policy_loss: -0.07106827418837282
          total_loss: -0.07715204478138023
          vf_explained_var: -0.45222464203834534
          vf_loss: 0.00014929301550081517
    num_agent_steps_sampled: 1589000
    num_agent_steps_trained: 1589000
    num_steps_sampled: 1589000
    num_steps_trained: 1589000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1589,39897.4,1589000,-0.07,0,-7,348.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1590000
  custom_metrics: {}
  date: 2021-10-09_09-29-55
  done: false
  episode_len_mean: 350.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4485
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.718068716261122
          entropy_coeff: 0.009999999999999998
          kl: 0.013446392180508344
          policy_loss: -0.07294177160494858
          total_loss: -0.08119590361085204
          vf_explained_var: -0.583313524723053
          vf_loss: 0.00019360629780243874
    num_agent_steps_sampled: 1590000
    num_agent_steps_trained: 1590000
    num_steps_sampled: 1590000
    num_steps_trained: 1590000
  iterations_since_restore: 1590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1590,39919.1,1590000,-0.07,0,-7,350.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1591000
  custom_metrics: {}
  date: 2021-10-09_09-30-18
  done: false
  episode_len_mean: 350.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4488
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.536990206109153
          entropy_coeff: 0.009999999999999998
          kl: 0.01686676009219236
          policy_loss: -0.06761330482032564
          total_loss: -0.07176492280430263
          vf_explained_var: -0.4085894823074341
          vf_loss: 0.0002639257770270989
    num_agent_steps_sampled: 1591000
    num_agent_steps_trained: 1591000
    num_steps_sampled: 1591000
    num_steps_trained: 1591000
  iterations_since_restore: 1591


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1591,39942.4,1591000,-0.07,0,-7,350.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1592000
  custom_metrics: {}
  date: 2021-10-09_09-30-44
  done: false
  episode_len_mean: 350.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4491
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.612926604350408
          entropy_coeff: 0.009999999999999998
          kl: 0.017545561403239714
          policy_loss: -0.073811472993758
          total_loss: -0.0782535072416067
          vf_explained_var: -0.41725030541419983
          vf_loss: 0.0002920156978588137
    num_agent_steps_sampled: 1592000
    num_agent_steps_trained: 1592000
    num_steps_sampled: 1592000
    num_steps_trained: 1592000
  iterations_since_restore: 1592
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1592,39968.2,1592000,-0.07,0,-7,350.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1593000
  custom_metrics: {}
  date: 2021-10-09_09-31-09
  done: false
  episode_len_mean: 351.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4494
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7821138011084663
          entropy_coeff: 0.009999999999999998
          kl: 0.01274147838078687
          policy_loss: -0.0596378940054112
          total_loss: -0.06909021764165825
          vf_explained_var: -0.5162422060966492
          vf_loss: 9.368106135904478e-05
    num_agent_steps_sampled: 1593000
    num_agent_steps_trained: 1593000
    num_steps_sampled: 1593000
    num_steps_trained: 1593000
  iterations_since_restore: 1593


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1593,39992.8,1593000,-0.07,0,-7,351.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1594000
  custom_metrics: {}
  date: 2021-10-09_09-31-35
  done: false
  episode_len_mean: 349.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4497
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6830801831351385
          entropy_coeff: 0.009999999999999998
          kl: 0.014714757441740526
          policy_loss: -0.11147881667647097
          total_loss: -0.11862935767405564
          vf_explained_var: -0.5874577760696411
          vf_loss: 0.00012355281424889755
    num_agent_steps_sampled: 1594000
    num_agent_steps_trained: 1594000
    num_steps_sampled: 1594000
    num_steps_trained: 1594000
  iterations_since_restore: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1594,40019.2,1594000,-0.07,0,-7,349.34




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1595000
  custom_metrics: {}
  date: 2021-10-09_09-32-19
  done: false
  episode_len_mean: 348.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4500
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.7614276382658216
          entropy_coeff: 0.009999999999999998
          kl: 0.014790298481773398
          policy_loss: -0.04677104614675045
          total_loss: -0.05466177206900385
          vf_explained_var: -0.6882734298706055
          vf_loss: 0.0001177794373587757
    num_agent_steps_sampled: 1595000
    num_agent_steps_trained: 1595000
    num_steps_sampled: 1595000
    num_steps_trained: 1595000
  iterations_since_restore: 159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1595,40062.9,1595000,-0.07,0,-7,348.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1596000
  custom_metrics: {}
  date: 2021-10-09_09-32-43
  done: false
  episode_len_mean: 348.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4503
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.8092234359847175
          entropy_coeff: 0.009999999999999998
          kl: 0.013537815127906604
          policy_loss: -0.06887308452278376
          total_loss: -0.07808658242639568
          vf_explained_var: -0.7003566026687622
          vf_loss: 8.640924503803642e-05
    num_agent_steps_sampled: 1596000
    num_agent_steps_trained: 1596000
    num_steps_sampled: 1596000
    num_steps_trained: 1596000
  iterations_since_restore: 159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1596,40086.8,1596000,-0.07,0,-7,348.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1597000
  custom_metrics: {}
  date: 2021-10-09_09-33-06
  done: false
  episode_len_mean: 349.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4506
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6943756196233961
          entropy_coeff: 0.009999999999999998
          kl: 0.02078551308099282
          policy_loss: -0.04890141160123878
          total_loss: -0.052078059232897225
          vf_explained_var: -0.6812515258789062
          vf_loss: 0.0002676600664724699
    num_agent_steps_sampled: 1597000
    num_agent_steps_trained: 1597000
    num_steps_sampled: 1597000
    num_steps_trained: 1597000
  iterations_since_restore: 1597
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1597,40109.8,1597000,0,0,0,349.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1598000
  custom_metrics: {}
  date: 2021-10-09_09-33-32
  done: false
  episode_len_mean: 348.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4509
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5908913294474283
          entropy_coeff: 0.009999999999999998
          kl: 0.01222837929620844
          policy_loss: -0.10089035383943054
          total_loss: -0.10476247796581852
          vf_explained_var: -0.17888520658016205
          vf_loss: 0.00012394435955987622
    num_agent_steps_sampled: 1598000
    num_agent_steps_trained: 1598000
    num_steps_sampled: 1598000
    num_steps_trained: 1598000
  iterations_since_restore: 1598


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1598,40136.4,1598000,0,0,0,348.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1599000
  custom_metrics: {}
  date: 2021-10-09_09-33-58
  done: false
  episode_len_mean: 347.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4512
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8372023675176832
          entropy_coeff: 0.009999999999999998
          kl: 0.010962301666391106
          policy_loss: -0.09204770106201371
          total_loss: -0.09965296333862675
          vf_explained_var: -0.9972059726715088
          vf_loss: 8.732916675297828e-05
    num_agent_steps_sampled: 1599000
    num_agent_steps_trained: 1599000
    num_steps_sampled: 1599000
    num_steps_trained: 1599000
  iterations_since_restore: 1599
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1599,40161.9,1599000,0,0,0,347.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1600000
  custom_metrics: {}
  date: 2021-10-09_09-34-23
  done: false
  episode_len_mean: 347.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4515
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.6882981525527105
          entropy_coeff: 0.009999999999999998
          kl: 0.008714934568526504
          policy_loss: -0.03635723808159431
          total_loss: -0.04468591366377142
          vf_explained_var: -0.701729416847229
          vf_loss: 6.424706441773904e-05
    num_agent_steps_sampled: 1600000
    num_agent_steps_trained: 1600000
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
  iterations_since_restore: 1600
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1600,40186.6,1600000,0,0,0,347.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1601000
  custom_metrics: {}
  date: 2021-10-09_09-34-47
  done: false
  episode_len_mean: 345.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4518
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7856148428387113
          entropy_coeff: 0.009999999999999998
          kl: 0.012378081248177233
          policy_loss: -0.07687730739514033
          total_loss: -0.08254288281831476
          vf_explained_var: -0.41233184933662415
          vf_loss: 0.00013188919955508835
    num_agent_steps_sampled: 1601000
    num_agent_steps_trained: 1601000
    num_steps_sampled: 1601000
    num_steps_trained: 1601000
  iterations_since_restore: 1601

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1601,40211,1601000,0,0,0,345.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1602000
  custom_metrics: {}
  date: 2021-10-09_09-35-12
  done: false
  episode_len_mean: 346.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4521
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7649597618314954
          entropy_coeff: 0.009999999999999998
          kl: 0.011898785308743222
          policy_loss: -0.07888949412024683
          total_loss: -0.08487065914604398
          vf_explained_var: -0.790647029876709
          vf_loss: 7.667828336303097e-05
    num_agent_steps_sampled: 1602000
    num_agent_steps_trained: 1602000
    num_steps_sampled: 1602000
    num_steps_trained: 1602000
  iterations_since_restore: 1602
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1602,40235.8,1602000,0,0,0,346.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1603000
  custom_metrics: {}
  date: 2021-10-09_09-35-34
  done: false
  episode_len_mean: 347.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4524
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8763124995761447
          entropy_coeff: 0.009999999999999998
          kl: 0.012579282538554379
          policy_loss: -0.039166046327186954
          total_loss: -0.04556930901275741
          vf_explained_var: -0.4104922413825989
          vf_loss: 0.0001051704233557555
    num_agent_steps_sampled: 1603000
    num_agent_steps_trained: 1603000
    num_steps_sampled: 1603000
    num_steps_trained: 1603000
  iterations_since_restore: 1603


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1603,40257.5,1603000,0,0,0,347.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1604000
  custom_metrics: {}
  date: 2021-10-09_09-35-59
  done: false
  episode_len_mean: 346.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4527
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.6985896176762052
          entropy_coeff: 0.009999999999999998
          kl: 0.011229127603745049
          policy_loss: -0.04967649396922853
          total_loss: -0.05565078440639708
          vf_explained_var: -0.4618760347366333
          vf_loss: 7.222978616078358e-05
    num_agent_steps_sampled: 1604000
    num_agent_steps_trained: 1604000
    num_steps_sampled: 1604000
    num_steps_trained: 1604000
  iterations_since_restore: 1604
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1604,40282.5,1604000,0,0,0,346.74




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1605000
  custom_metrics: {}
  date: 2021-10-09_09-36-40
  done: false
  episode_len_mean: 346.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4530
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.803394960032569
          entropy_coeff: 0.009999999999999998
          kl: 0.014715344381566
          policy_loss: -0.09549501418239541
          total_loss: -0.09911163002252579
          vf_explained_var: -0.31983157992362976
          vf_loss: 8.169993367725207e-05
    num_agent_steps_sampled: 1605000
    num_agent_steps_trained: 1605000
    num_steps_sampled: 1605000
    num_steps_trained: 1605000
  iterations_since_restore: 1605
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1605,40324,1605000,0,0,0,346.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1606000
  custom_metrics: {}
  date: 2021-10-09_09-37-06
  done: false
  episode_len_mean: 348.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4533
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7213103201654223
          entropy_coeff: 0.009999999999999998
          kl: 0.008933095508892248
          policy_loss: -0.042862954922020434
          total_loss: -0.0513203999441531
          vf_explained_var: -0.9902772903442383
          vf_loss: 5.306884743024259e-05
    num_agent_steps_sampled: 1606000
    num_agent_steps_trained: 1606000
    num_steps_sampled: 1606000
    num_steps_trained: 1606000
  iterations_since_restore: 1606
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1606,40349.9,1606000,0,0,0,348.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1607000
  custom_metrics: {}
  date: 2021-10-09_09-37-31
  done: false
  episode_len_mean: 345.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4536
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8853399833043416
          entropy_coeff: 0.009999999999999998
          kl: 0.012417185731475438
          policy_loss: -0.08427736825413173
          total_loss: -0.09095174057616127
          vf_explained_var: -0.44393810629844666
          vf_loss: 8.225089109651486e-05
    num_agent_steps_sampled: 1607000
    num_agent_steps_trained: 1607000
    num_steps_sampled: 1607000
    num_steps_trained: 1607000
  iterations_since_restore: 1607


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1607,40375.1,1607000,0,0,0,345.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1608000
  custom_metrics: {}
  date: 2021-10-09_09-37-55
  done: false
  episode_len_mean: 344.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4539
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.748476791381836
          entropy_coeff: 0.009999999999999998
          kl: 0.013690891772156169
          policy_loss: -0.07202041476137108
          total_loss: -0.07607719736794631
          vf_explained_var: -0.3662179410457611
          vf_loss: 9.036909448493842e-05
    num_agent_steps_sampled: 1608000
    num_agent_steps_trained: 1608000
    num_steps_sampled: 1608000
    num_steps_trained: 1608000
  iterations_since_restore: 1608
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1608,40398.3,1608000,0,0,0,344.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1609000
  custom_metrics: {}
  date: 2021-10-09_09-38-20
  done: false
  episode_len_mean: 344.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4542
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7704524954160055
          entropy_coeff: 0.009999999999999998
          kl: 0.006921893118717016
          policy_loss: -0.036872786035140354
          total_loss: -0.047743297678728897
          vf_explained_var: -0.1755007803440094
          vf_loss: 9.073209661841651e-05
    num_agent_steps_sampled: 1609000
    num_agent_steps_trained: 1609000
    num_steps_sampled: 1609000
    num_steps_trained: 1609000
  iterations_since_restore: 1609

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1609,40424.1,1609000,0,0,0,344.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1610000
  custom_metrics: {}
  date: 2021-10-09_09-38-46
  done: false
  episode_len_mean: 344.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4545
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.561839184496138
          entropy_coeff: 0.009999999999999998
          kl: 0.011723423890477314
          policy_loss: -0.12743257209658623
          total_loss: -0.13146220420797666
          vf_explained_var: 0.37489455938339233
          vf_loss: 0.00016784054439792978
    num_agent_steps_sampled: 1610000
    num_agent_steps_trained: 1610000
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
  iterations_since_restore: 1610
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1610,40449.8,1610000,0,0,0,344.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1611000
  custom_metrics: {}
  date: 2021-10-09_09-39-09
  done: false
  episode_len_mean: 344.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4547
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 2.0327442235416835
          entropy_coeff: 0.009999999999999998
          kl: 0.007344358506445629
          policy_loss: -0.005742228496819735
          total_loss: -0.01884216441669398
          vf_explained_var: -0.6759915351867676
          vf_loss: 7.265925386390235e-05
    num_agent_steps_sampled: 1611000
    num_agent_steps_trained: 1611000
    num_steps_sampled: 1611000
    num_steps_trained: 1611000
  iterations_since_restore: 1611


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1611,40472.4,1611000,0,0,0,344.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1612000
  custom_metrics: {}
  date: 2021-10-09_09-39-32
  done: false
  episode_len_mean: 343.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4550
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.834696998861101
          entropy_coeff: 0.009999999999999998
          kl: 0.008942822561365915
          policy_loss: -0.09463745125879844
          total_loss: -0.10423344732779595
          vf_explained_var: -0.863551676273346
          vf_loss: 3.8909213081852716e-05
    num_agent_steps_sampled: 1612000
    num_agent_steps_trained: 1612000
    num_steps_sampled: 1612000
    num_steps_trained: 1612000
  iterations_since_restore: 1612
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1612,40495.2,1612000,0,0,0,343.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1613000
  custom_metrics: {}
  date: 2021-10-09_09-39-55
  done: false
  episode_len_mean: 344.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4553
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.9216107779079012
          entropy_coeff: 0.009999999999999998
          kl: 0.012245835596974312
          policy_loss: -0.045745747153543764
          total_loss: -0.05298587771960431
          vf_explained_var: -0.8226743936538696
          vf_loss: 4.613109277771501e-05
    num_agent_steps_sampled: 1613000
    num_agent_steps_trained: 1613000
    num_steps_sampled: 1613000
    num_steps_trained: 1613000
  iterations_since_restore: 1613


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1613,40518.5,1613000,0,0,0,344.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1614000
  custom_metrics: {}
  date: 2021-10-09_09-40-24
  done: false
  episode_len_mean: 342.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4557
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.526337785191006
          entropy_coeff: 0.009999999999999998
          kl: 0.016035161780246592
          policy_loss: -0.07162578596423069
          total_loss: -0.07103769174880452
          vf_explained_var: 0.3990926444530487
          vf_loss: 0.00023007673387635602
    num_agent_steps_sampled: 1614000
    num_agent_steps_trained: 1614000
    num_steps_sampled: 1614000
    num_steps_trained: 1614000
  iterations_since_restore: 1614
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1614,40547.2,1614000,0,0,0,342.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1615000
  custom_metrics: {}
  date: 2021-10-09_09-40-47
  done: false
  episode_len_mean: 341.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4559
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8140642642974854
          entropy_coeff: 0.009999999999999998
          kl: 0.01204202287725147
          policy_loss: -0.10940670871900188
          total_loss: -0.1157011547850238
          vf_explained_var: -0.029102012515068054
          vf_loss: 0.00011490408620981421
    num_agent_steps_sampled: 1615000
    num_agent_steps_trained: 1615000
    num_steps_sampled: 1615000
    num_steps_trained: 1615000
  iterations_since_restore: 1615


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1615,40570.3,1615000,0,0,0,341.14




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1616000
  custom_metrics: {}
  date: 2021-10-09_09-41-26
  done: false
  episode_len_mean: 341.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4562
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8871598283449809
          entropy_coeff: 0.009999999999999998
          kl: 0.00956143214530321
          policy_loss: -0.057105614783035386
          total_loss: -0.06661377333932453
          vf_explained_var: -0.6064769625663757
          vf_loss: 4.872748534883916e-05
    num_agent_steps_sampled: 1616000
    num_agent_steps_trained: 1616000
    num_steps_sampled: 1616000
    num_steps_trained: 1616000
  iterations_since_restore: 1616
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1616,40609.4,1616000,0,0,0,341.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1617000
  custom_metrics: {}
  date: 2021-10-09_09-41-50
  done: false
  episode_len_mean: 342.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4565
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7769170032607184
          entropy_coeff: 0.009999999999999998
          kl: 0.010920285712276248
          policy_loss: -0.061110104920549524
          total_loss: -0.06821075825848513
          vf_explained_var: -0.5289825797080994
          vf_loss: 3.001432783599335e-05
    num_agent_steps_sampled: 1617000
    num_agent_steps_trained: 1617000
    num_steps_sampled: 1617000
    num_steps_trained: 1617000
  iterations_since_restore: 1617


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1617,40633.9,1617000,0,0,0,342.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1618000
  custom_metrics: {}
  date: 2021-10-09_09-42-14
  done: false
  episode_len_mean: 341.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4568
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8140205144882202
          entropy_coeff: 0.009999999999999998
          kl: 0.01181215484556317
          policy_loss: -0.09165589643849267
          total_loss: -0.09826328559882111
          vf_explained_var: -0.6446152925491333
          vf_loss: 2.5457916995542796e-05
    num_agent_steps_sampled: 1618000
    num_agent_steps_trained: 1618000
    num_steps_sampled: 1618000
    num_steps_trained: 1618000
  iterations_since_restore: 1618
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1618,40657.6,1618000,0,0,0,341.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1619000
  custom_metrics: {}
  date: 2021-10-09_09-42-39
  done: false
  episode_len_mean: 341.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4571
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8472199241320293
          entropy_coeff: 0.009999999999999998
          kl: 0.01339565531522232
          policy_loss: -0.11260307505726815
          total_loss: -0.11799060342212518
          vf_explained_var: -0.2860768139362335
          vf_loss: 3.467533983388825e-05
    num_agent_steps_sampled: 1619000
    num_agent_steps_trained: 1619000
    num_steps_sampled: 1619000
    num_steps_trained: 1619000
  iterations_since_restore: 1619
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1619,40682.6,1619000,0,0,0,341.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1620000
  custom_metrics: {}
  date: 2021-10-09_09-43-07
  done: false
  episode_len_mean: 339.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4574
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5503819598091972
          entropy_coeff: 0.009999999999999998
          kl: 0.011478658887642722
          policy_loss: -0.061752959630555576
          total_loss: -0.06604540037612121
          vf_explained_var: -0.518502950668335
          vf_loss: 2.8908155703296263e-05
    num_agent_steps_sampled: 1620000
    num_agent_steps_trained: 1620000
    num_steps_sampled: 1620000
    num_steps_trained: 1620000
  iterations_since_restore: 1620


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1620,40710.2,1620000,0,0,0,339.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1621000
  custom_metrics: {}
  date: 2021-10-09_09-43-30
  done: false
  episode_len_mean: 340.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4577
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.9469108687506782
          entropy_coeff: 0.009999999999999998
          kl: 0.008349401117355824
          policy_loss: -0.022359602525830267
          total_loss: -0.03364613684308198
          vf_explained_var: -1.0
          vf_loss: 4.861908607078173e-05
    num_agent_steps_sampled: 1621000
    num_agent_steps_trained: 1621000
    num_steps_sampled: 1621000
    num_steps_trained: 1621000
  iterations_since_restore: 1621
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1621,40734,1621000,0,0,0,340.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1622000
  custom_metrics: {}
  date: 2021-10-09_09-43-56
  done: false
  episode_len_mean: 339.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4580
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.88449039194319
          entropy_coeff: 0.009999999999999998
          kl: 0.01310191123570509
          policy_loss: -0.06887978870007727
          total_loss: -0.0749256985882918
          vf_explained_var: -0.45785877108573914
          vf_loss: 3.516026889782451e-05
    num_agent_steps_sampled: 1622000
    num_agent_steps_trained: 1622000
    num_steps_sampled: 1622000
    num_steps_trained: 1622000
  iterations_since_restore: 1622
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1622,40759.3,1622000,0,0,0,339.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1623000
  custom_metrics: {}
  date: 2021-10-09_09-44-20
  done: false
  episode_len_mean: 338.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4583
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.6607262704107497
          entropy_coeff: 0.009999999999999998
          kl: 0.006645998833345423
          policy_loss: -0.14108136039641167
          total_loss: -0.1511854824092653
          vf_explained_var: -0.355503112077713
          vf_loss: 2.8632731118705124e-05
    num_agent_steps_sampled: 1623000
    num_agent_steps_trained: 1623000
    num_steps_sampled: 1623000
    num_steps_trained: 1623000
  iterations_since_restore: 1623
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1623,40783.2,1623000,0,0,0,338.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1624000
  custom_metrics: {}
  date: 2021-10-09_09-44-46
  done: false
  episode_len_mean: 336.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4586
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7850160625245837
          entropy_coeff: 0.009999999999999998
          kl: 0.011786978382854032
          policy_loss: -0.08325257996718088
          total_loss: -0.08929304803411166
          vf_explained_var: -0.224822998046875
          vf_loss: 0.0003268643160683698
    num_agent_steps_sampled: 1624000
    num_agent_steps_trained: 1624000
    num_steps_sampled: 1624000
    num_steps_trained: 1624000
  iterations_since_restore: 1624
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1624,40809.9,1624000,0,0,0,336.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1625000
  custom_metrics: {}
  date: 2021-10-09_09-45-12
  done: false
  episode_len_mean: 336.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4589
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.9261529233720567
          entropy_coeff: 0.009999999999999998
          kl: 0.01129512417432126
          policy_loss: -0.05764800798561838
          total_loss: -0.06586722363200452
          vf_explained_var: -0.44960325956344604
          vf_loss: 3.864379919428352e-05
    num_agent_steps_sampled: 1625000
    num_agent_steps_trained: 1625000
    num_steps_sampled: 1625000
    num_steps_trained: 1625000
  iterations_since_restore: 1625
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1625,40835.2,1625000,0,0,0,336.96




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1626000
  custom_metrics: {}
  date: 2021-10-09_09-45-54
  done: false
  episode_len_mean: 335.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4592
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7987056427531771
          entropy_coeff: 0.009999999999999998
          kl: 0.009396693113237476
          policy_loss: -0.09092369224462245
          total_loss: -0.09968282700412803
          vf_explained_var: 0.43990543484687805
          vf_loss: 7.369898063795215e-05
    num_agent_steps_sampled: 1626000
    num_agent_steps_trained: 1626000
    num_steps_sampled: 1626000
    num_steps_trained: 1626000
  iterations_since_restore: 1626
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1626,40877.2,1626000,0,0,0,335.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1627000
  custom_metrics: {}
  date: 2021-10-09_09-46-17
  done: false
  episode_len_mean: 337.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4595
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8522667341762118
          entropy_coeff: 0.009999999999999998
          kl: 0.009708402815820666
          policy_loss: -0.05124833600388633
          total_loss: -0.06028409817566474
          vf_explained_var: -0.6522107720375061
          vf_loss: 2.9016445912727957e-05
    num_agent_steps_sampled: 1627000
    num_agent_steps_trained: 1627000
    num_steps_sampled: 1627000
    num_steps_trained: 1627000
  iterations_since_restore: 1627


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1627,40900.6,1627000,0,0,0,337.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1628000
  custom_metrics: {}
  date: 2021-10-09_09-46-41
  done: false
  episode_len_mean: 338.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4598
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8068461113505894
          entropy_coeff: 0.009999999999999998
          kl: 0.013050084137647942
          policy_loss: -0.06509373790274063
          total_loss: -0.07042511517388952
          vf_explained_var: -0.2923637926578522
          vf_loss: 2.3740334118984822e-05
    num_agent_steps_sampled: 1628000
    num_agent_steps_trained: 1628000
    num_steps_sampled: 1628000
    num_steps_trained: 1628000
  iterations_since_restore: 1628


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1628,40924.6,1628000,0,0,0,338.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1629000
  custom_metrics: {}
  date: 2021-10-09_09-47-11
  done: false
  episode_len_mean: 337.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4601
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5860393312242296
          entropy_coeff: 0.009999999999999998
          kl: 0.010465808968937809
          policy_loss: -0.012944517998645703
          total_loss: -0.018568326781193415
          vf_explained_var: -0.4573894441127777
          vf_loss: 4.083327441751155e-05
    num_agent_steps_sampled: 1629000
    num_agent_steps_trained: 1629000
    num_steps_sampled: 1629000
    num_steps_trained: 1629000
  iterations_since_restore: 1629

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1629,40954.7,1629000,0,0,0,337.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1630000
  custom_metrics: {}
  date: 2021-10-09_09-47-36
  done: false
  episode_len_mean: 336.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4604
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.908271610736847
          entropy_coeff: 0.009999999999999998
          kl: 0.013275379105591078
          policy_loss: -0.06191945318132639
          total_loss: -0.06805209478156435
          vf_explained_var: 0.04656728729605675
          vf_loss: 1.7248080580935087e-05
    num_agent_steps_sampled: 1630000
    num_agent_steps_trained: 1630000
    num_steps_sampled: 1630000
    num_steps_trained: 1630000
  iterations_since_restore: 1630
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1630,40979.8,1630000,0,0,0,336.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1631000
  custom_metrics: {}
  date: 2021-10-09_09-48-02
  done: false
  episode_len_mean: 334.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4607
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7582089530097114
          entropy_coeff: 0.009999999999999998
          kl: 0.008677452562647165
          policy_loss: -0.015110086070166694
          total_loss: -0.024198267857233682
          vf_explained_var: -0.6378001570701599
          vf_loss: 4.036422015916388e-05
    num_agent_steps_sampled: 1631000
    num_agent_steps_trained: 1631000
    num_steps_sampled: 1631000
    num_steps_trained: 1631000
  iterations_since_restore: 1631

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1631,41005.4,1631000,0,0,0,334.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1632000
  custom_metrics: {}
  date: 2021-10-09_09-48-25
  done: false
  episode_len_mean: 337.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4610
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.728083974785275
          entropy_coeff: 0.009999999999999998
          kl: 0.0035958026137651584
          policy_loss: -0.027599964415033658
          total_loss: -0.040976963295704785
          vf_explained_var: -0.45058920979499817
          vf_loss: 0.00040082300792063406
    num_agent_steps_sampled: 1632000
    num_agent_steps_trained: 1632000
    num_steps_sampled: 1632000
    num_steps_trained: 1632000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1632,41028.2,1632000,0,0,0,337.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1633000
  custom_metrics: {}
  date: 2021-10-09_09-48-48
  done: false
  episode_len_mean: 338.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4613
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.8521989279323154
          entropy_coeff: 0.009999999999999998
          kl: 0.014604101476742128
          policy_loss: -0.06890926580462191
          total_loss: -0.08027569324605995
          vf_explained_var: -0.6584380865097046
          vf_loss: 4.193128859494916e-05
    num_agent_steps_sampled: 1633000
    num_agent_steps_trained: 1633000
    num_steps_sampled: 1633000
    num_steps_trained: 1633000
  iterations_since_restore: 1633


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1633,41051,1633000,0,0,0,338.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1634000
  custom_metrics: {}
  date: 2021-10-09_09-49-13
  done: false
  episode_len_mean: 338.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4616
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.7497457769181994
          entropy_coeff: 0.009999999999999998
          kl: 0.011435295605614234
          policy_loss: -0.05203808966196245
          total_loss: -0.06395191833790806
          vf_explained_var: -0.20082935690879822
          vf_loss: 1.3516690104451199e-05
    num_agent_steps_sampled: 1634000
    num_agent_steps_trained: 1634000
    num_steps_sampled: 1634000
    num_steps_trained: 1634000
  iterations_since_restore: 163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1634,41076.4,1634000,0,0,0,338.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1635000
  custom_metrics: {}
  date: 2021-10-09_09-49-39
  done: false
  episode_len_mean: 337.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4619
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.711546912458208
          entropy_coeff: 0.009999999999999998
          kl: 0.013302397440752303
          policy_loss: -0.03639350069893731
          total_loss: -0.04695227241350545
          vf_explained_var: -0.6604686975479126
          vf_loss: 7.712261553428511e-05
    num_agent_steps_sampled: 1635000
    num_agent_steps_trained: 1635000
    num_steps_sampled: 1635000
    num_steps_trained: 1635000
  iterations_since_restore: 1635
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1635,41101.9,1635000,0,0,0,337.88




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1636000
  custom_metrics: {}
  date: 2021-10-09_09-50-20
  done: false
  episode_len_mean: 337.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4622
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.8024485866228739
          entropy_coeff: 0.009999999999999998
          kl: 0.02145506461585024
          policy_loss: -0.10162142980843783
          total_loss: -0.1091689569875598
          vf_explained_var: -0.3693069517612457
          vf_loss: 2.6236708587627316e-05
    num_agent_steps_sampled: 1636000
    num_agent_steps_trained: 1636000
    num_steps_sampled: 1636000
    num_steps_trained: 1636000
  iterations_since_restore: 1636
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1636,41143.5,1636000,0,0,0,337.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1637000
  custom_metrics: {}
  date: 2021-10-09_09-50-46
  done: false
  episode_len_mean: 337.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4625
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.7753185603353712
          entropy_coeff: 0.009999999999999998
          kl: 0.014564668044039426
          policy_loss: -0.10975139778521326
          total_loss: -0.11678970282276471
          vf_explained_var: -0.5252465605735779
          vf_loss: 7.324715986922254e-05
    num_agent_steps_sampled: 1637000
    num_agent_steps_trained: 1637000
    num_steps_sampled: 1637000
    num_steps_trained: 1637000
  iterations_since_restore: 1637
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1637,41168.7,1637000,0,0,0,337.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1638000
  custom_metrics: {}
  date: 2021-10-09_09-51-09
  done: false
  episode_len_mean: 337.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4627
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.847392245133718
          entropy_coeff: 0.009999999999999998
          kl: 0.012174678105456178
          policy_loss: -0.09793220224479834
          total_loss: -0.10748980620668994
          vf_explained_var: -0.6292926073074341
          vf_loss: 2.09259122028824e-05
    num_agent_steps_sampled: 1638000
    num_agent_steps_trained: 1638000
    num_steps_sampled: 1638000
    num_steps_trained: 1638000
  iterations_since_restore: 1638
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1638,41192.6,1638000,0,0,0,337.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1639000
  custom_metrics: {}
  date: 2021-10-09_09-51-32
  done: false
  episode_len_mean: 339.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4630
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9645510329140556
          entropy_coeff: 0.009999999999999998
          kl: 0.014326045000727679
          policy_loss: -0.12346386437614759
          total_loss: -0.13262260879079502
          vf_explained_var: -0.5584972500801086
          vf_loss: 1.948027973715701e-05
    num_agent_steps_sampled: 1639000
    num_agent_steps_trained: 1639000
    num_steps_sampled: 1639000
    num_steps_trained: 1639000
  iterations_since_restore: 1639
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1639,41215.1,1639000,0,0,0,339.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1640000
  custom_metrics: {}
  date: 2021-10-09_09-51-56
  done: false
  episode_len_mean: 339.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4633
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.771826860639784
          entropy_coeff: 0.009999999999999998
          kl: 0.017042427391795595
          policy_loss: -0.09079667776823044
          total_loss: -0.09599456919564141
          vf_explained_var: -0.2631814777851105
          vf_loss: 6.837573871760267e-05
    num_agent_steps_sampled: 1640000
    num_agent_steps_trained: 1640000
    num_steps_sampled: 1640000
    num_steps_trained: 1640000
  iterations_since_restore: 1640
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1640,41239.4,1640000,0,0,0,339.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1641000
  custom_metrics: {}
  date: 2021-10-09_09-52-20
  done: false
  episode_len_mean: 340.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4636
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9696552647484673
          entropy_coeff: 0.009999999999999998
          kl: 0.012636789633564167
          policy_loss: -0.0858522561378777
          total_loss: -0.0962951866702901
          vf_explained_var: -0.5565000176429749
          vf_loss: 2.0586727694333403e-05
    num_agent_steps_sampled: 1641000
    num_agent_steps_trained: 1641000
    num_steps_sampled: 1641000
    num_steps_trained: 1641000
  iterations_since_restore: 1641
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1641,41263,1641000,0,0,0,340.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1642000
  custom_metrics: {}
  date: 2021-10-09_09-52-44
  done: false
  episode_len_mean: 340.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4638
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.7991442018085055
          entropy_coeff: 0.009999999999999998
          kl: 0.010032674526115946
          policy_loss: 0.018128128887878524
          total_loss: 0.0074842625194125705
          vf_explained_var: -0.24537160992622375
          vf_loss: 1.7228839139230936e-05
    num_agent_steps_sampled: 1642000
    num_agent_steps_trained: 1642000
    num_steps_sampled: 1642000
    num_steps_trained: 1642000
  iterations_since_restore: 164

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1642,41287.1,1642000,0,0,0,340.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1643000
  custom_metrics: {}
  date: 2021-10-09_09-53-09
  done: false
  episode_len_mean: 340.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4641
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.7306442750824822
          entropy_coeff: 0.009999999999999998
          kl: 0.015953249405111197
          policy_loss: -0.025752990278932783
          total_loss: -0.03139078749550713
          vf_explained_var: 0.508331835269928
          vf_loss: 1.2445895916041789e-05
    num_agent_steps_sampled: 1643000
    num_agent_steps_trained: 1643000
    num_steps_sampled: 1643000
    num_steps_trained: 1643000
  iterations_since_restore: 1643
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1643,41312,1643000,0,0,0,340.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1644000
  custom_metrics: {}
  date: 2021-10-09_09-53-33
  done: false
  episode_len_mean: 341.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4644
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8139010495609706
          entropy_coeff: 0.009999999999999998
          kl: 0.011783758080286689
          policy_loss: -0.08893324207100603
          total_loss: -0.0984124223391215
          vf_explained_var: -0.2861323952674866
          vf_loss: 5.005944111770886e-05
    num_agent_steps_sampled: 1644000
    num_agent_steps_trained: 1644000
    num_steps_sampled: 1644000
    num_steps_trained: 1644000
  iterations_since_restore: 1644
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1644,41336.1,1644000,0,0,0,341.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1645000
  custom_metrics: {}
  date: 2021-10-09_09-53-55
  done: false
  episode_len_mean: 342.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4647
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.90129773484336
          entropy_coeff: 0.009999999999999998
          kl: 0.014525069887011613
          policy_loss: -0.14922619747618834
          total_loss: -0.15760684286554655
          vf_explained_var: -0.20566046237945557
          vf_loss: 1.9629487597841136e-05
    num_agent_steps_sampled: 1645000
    num_agent_steps_trained: 1645000
    num_steps_sampled: 1645000
    num_steps_trained: 1645000
  iterations_since_restore: 1645
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1645,41358.5,1645000,0,0,0,342.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1646000
  custom_metrics: {}
  date: 2021-10-09_09-54-37
  done: false
  episode_len_mean: 341.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4650
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.88125874598821
          entropy_coeff: 0.009999999999999998
          kl: 0.01329476451939517
          policy_loss: -0.08067765724327829
          total_loss: -0.08975947606894705
          vf_explained_var: -0.2107359766960144
          vf_loss: 1.6987029200916165e-05
    num_agent_steps_sampled: 1646000
    num_agent_steps_trained: 1646000
    num_steps_sampled: 1646000
    num_steps_trained: 1646000
  iterations_since_restore: 1646
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1646,41400.5,1646000,0,0,0,341.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1647000
  custom_metrics: {}
  date: 2021-10-09_09-54-59
  done: false
  episode_len_mean: 342.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4652
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.948363761107127
          entropy_coeff: 0.009999999999999998
          kl: 0.009916099159970966
          policy_loss: -0.046115462730328245
          total_loss: -0.058344209732280834
          vf_explained_var: -0.5834522247314453
          vf_loss: 9.72124767031346e-06
    num_agent_steps_sampled: 1647000
    num_agent_steps_trained: 1647000
    num_steps_sampled: 1647000
    num_steps_trained: 1647000
  iterations_since_restore: 1647
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1647,41422.3,1647000,0,0,0,342.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1648000
  custom_metrics: {}
  date: 2021-10-09_09-55-23
  done: false
  episode_len_mean: 344.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4655
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.6276781572235954
          entropy_coeff: 0.009999999999999998
          kl: 0.011131680551036678
          policy_loss: -0.11513851990716326
          total_loss: -0.12317750946515137
          vf_explained_var: -0.47117236256599426
          vf_loss: 0.00010446049409438274
    num_agent_steps_sampled: 1648000
    num_agent_steps_trained: 1648000
    num_steps_sampled: 1648000
    num_steps_trained: 1648000
  iterations_since_restore: 1648


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1648,41446.1,1648000,0,0,0,344.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1649000
  custom_metrics: {}
  date: 2021-10-09_09-55-45
  done: false
  episode_len_mean: 344.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4658
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9108371800846524
          entropy_coeff: 0.009999999999999998
          kl: 0.013175837705318867
          policy_loss: -0.1012096758104033
          total_loss: -0.11066130383147134
          vf_explained_var: -0.27846837043762207
          vf_loss: 2.9854974263798796e-05
    num_agent_steps_sampled: 1649000
    num_agent_steps_trained: 1649000
    num_steps_sampled: 1649000
    num_steps_trained: 1649000
  iterations_since_restore: 1649


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1649,41468.4,1649000,0,0,0,344.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1650000
  custom_metrics: {}
  date: 2021-10-09_09-56-09
  done: false
  episode_len_mean: 345.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4661
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9569608887036642
          entropy_coeff: 0.009999999999999998
          kl: 0.014440317636233118
          policy_loss: -0.08179736170503828
          total_loss: -0.09079985986981128
          vf_explained_var: -0.24870525300502777
          vf_loss: 1.6335223371141992e-05
    num_agent_steps_sampled: 1650000
    num_agent_steps_trained: 1650000
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
  iterations_since_restore: 1650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1650,41492.3,1650000,0,0,0,345.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1651000
  custom_metrics: {}
  date: 2021-10-09_09-56-33
  done: false
  episode_len_mean: 344.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4664
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8896613306469388
          entropy_coeff: 0.009999999999999998
          kl: 0.01003700320584405
          policy_loss: -0.022549901985459857
          total_loss: -0.034097809261745875
          vf_explained_var: 0.04690143093466759
          vf_loss: 1.51963197367877e-05
    num_agent_steps_sampled: 1651000
    num_agent_steps_trained: 1651000
    num_steps_sampled: 1651000
    num_steps_trained: 1651000
  iterations_since_restore: 1651
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1651,41516.3,1651000,0,0,0,344.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1652000
  custom_metrics: {}
  date: 2021-10-09_09-57-01
  done: false
  episode_len_mean: 342.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4667
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8171887463993497
          entropy_coeff: 0.009999999999999998
          kl: 0.009170246722894662
          policy_loss: -0.0175626863208082
          total_loss: -0.0289366585512956
          vf_explained_var: -0.8140705823898315
          vf_loss: 9.769841780224751e-05
    num_agent_steps_sampled: 1652000
    num_agent_steps_trained: 1652000
    num_steps_sampled: 1652000
    num_steps_trained: 1652000
  iterations_since_restore: 1652
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1652,41544.1,1652000,0,0,0,342.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1653000
  custom_metrics: {}
  date: 2021-10-09_09-57-26
  done: false
  episode_len_mean: 342.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4670
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8646779325273302
          entropy_coeff: 0.009999999999999998
          kl: 0.01747894160514748
          policy_loss: -0.08565072864294052
          total_loss: -0.09150046731034915
          vf_explained_var: -0.13414105772972107
          vf_loss: 2.6100851866633295e-05
    num_agent_steps_sampled: 1653000
    num_agent_steps_trained: 1653000
    num_steps_sampled: 1653000
    num_steps_trained: 1653000
  iterations_since_restore: 1653


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1653,41569.1,1653000,0,0,0,342.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1654000
  custom_metrics: {}
  date: 2021-10-09_09-57-55
  done: false
  episode_len_mean: 340.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 4674
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.5016061491436428
          entropy_coeff: 0.009999999999999998
          kl: 0.007224766427098572
          policy_loss: -0.10507671286662419
          total_loss: -0.11405961993667814
          vf_explained_var: -0.04308407008647919
          vf_loss: 0.000754402287081272
    num_agent_steps_sampled: 1654000
    num_agent_steps_trained: 1654000
    num_steps_sampled: 1654000
    num_steps_trained: 1654000
  iterations_since_restore: 1654
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1654,41597.5,1654000,0,0,0,340.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1655000
  custom_metrics: {}
  date: 2021-10-09_09-58-19
  done: false
  episode_len_mean: 340.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4677
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9019769363933139
          entropy_coeff: 0.009999999999999998
          kl: 0.01279557674770015
          policy_loss: -0.1545706522754497
          total_loss: -0.16418448019151885
          vf_explained_var: 0.04502426087856293
          vf_loss: 5.68876469262452e-05
    num_agent_steps_sampled: 1655000
    num_agent_steps_trained: 1655000
    num_steps_sampled: 1655000
    num_steps_trained: 1655000
  iterations_since_restore: 1655
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1655,41621.8,1655000,0,0,0,340.69




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1656000
  custom_metrics: {}
  date: 2021-10-09_09-59-01
  done: false
  episode_len_mean: 340.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4680
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8780061721801757
          entropy_coeff: 0.009999999999999998
          kl: 0.013246554121144605
          policy_loss: -0.07735295099102789
          total_loss: -0.0864236664544377
          vf_explained_var: -0.37488096952438354
          vf_loss: 3.078601652709444e-05
    num_agent_steps_sampled: 1656000
    num_agent_steps_trained: 1656000
    num_steps_sampled: 1656000
    num_steps_trained: 1656000
  iterations_since_restore: 1656
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1656,41663.8,1656000,0,0,0,340.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1657000
  custom_metrics: {}
  date: 2021-10-09_09-59-26
  done: false
  episode_len_mean: 340.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4683
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8859107004271614
          entropy_coeff: 0.009999999999999998
          kl: 0.013753226185540483
          policy_loss: 0.0017472359869215223
          total_loss: 0.09690251731210285
          vf_explained_var: -0.447949081659317
          vf_loss: 0.10396563749042495
    num_agent_steps_sampled: 1657000
    num_agent_steps_trained: 1657000
    num_steps_sampled: 1657000
    num_steps_trained: 1657000
  iterations_since_restore: 1657
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1657,41688.4,1657000,-0.07,0,-7,340.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1658000
  custom_metrics: {}
  date: 2021-10-09_09-59-50
  done: false
  episode_len_mean: 341.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4686
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8443311558829414
          entropy_coeff: 0.009999999999999998
          kl: 0.01588855263922599
          policy_loss: 0.008486795425415038
          total_loss: 0.007638503693872028
          vf_explained_var: 0.5406290888786316
          vf_loss: 0.00598609323044204
    num_agent_steps_sampled: 1658000
    num_agent_steps_trained: 1658000
    num_steps_sampled: 1658000
    num_steps_trained: 1658000
  iterations_since_restore: 1658
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1658,41713,1658000,-0.07,0,-7,341.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1659000
  custom_metrics: {}
  date: 2021-10-09_10-00-16
  done: false
  episode_len_mean: 340.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4689
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8487733258141412
          entropy_coeff: 0.009999999999999998
          kl: 0.01381239139261786
          policy_loss: 0.03757880793677436
          total_loss: 0.034330564944280516
          vf_explained_var: 0.35896003246307373
          vf_loss: 0.005147505618838801
    num_agent_steps_sampled: 1659000
    num_agent_steps_trained: 1659000
    num_steps_sampled: 1659000
    num_steps_trained: 1659000
  iterations_since_restore: 1659
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1659,41738.7,1659000,-0.07,0,-7,340.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1660000
  custom_metrics: {}
  date: 2021-10-09_10-00-39
  done: false
  episode_len_mean: 341.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4691
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.769626388284895
          entropy_coeff: 0.009999999999999998
          kl: 0.013957728464877897
          policy_loss: -0.012902182713150977
          total_loss: -0.016860017532275784
          vf_explained_var: -0.21457871794700623
          vf_loss: 0.00354025373623396
    num_agent_steps_sampled: 1660000
    num_agent_steps_trained: 1660000
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
  iterations_since_restore: 166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1660,41761.7,1660000,-0.07,0,-7,341.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1661000
  custom_metrics: {}
  date: 2021-10-09_10-01-02
  done: false
  episode_len_mean: 341.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4694
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.0258600420422024
          entropy_coeff: 0.009999999999999998
          kl: 0.014153403005836295
          policy_loss: -0.10986575960285133
          total_loss: -0.1166189720440242
          vf_explained_var: -0.518132209777832
          vf_loss: 0.0031642398796975614
    num_agent_steps_sampled: 1661000
    num_agent_steps_trained: 1661000
    num_steps_sampled: 1661000
    num_steps_trained: 1661000
  iterations_since_restore: 1661


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1661,41784.7,1661000,-0.07,0,-7,341.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1662000
  custom_metrics: {}
  date: 2021-10-09_10-01-26
  done: false
  episode_len_mean: 343.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4697
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9893048855993483
          entropy_coeff: 0.009999999999999998
          kl: 0.012912855831376645
          policy_loss: -0.09410608841313256
          total_loss: -0.10293947089877394
          vf_explained_var: -0.3215065896511078
          vf_loss: 0.0016249254242413575
    num_agent_steps_sampled: 1662000
    num_agent_steps_trained: 1662000
    num_steps_sampled: 1662000
    num_steps_trained: 1662000
  iterations_since_restore: 1662

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1662,41808.6,1662000,-0.07,0,-7,343


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1663000
  custom_metrics: {}
  date: 2021-10-09_10-01-50
  done: false
  episode_len_mean: 344.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4700
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.009182131290436
          entropy_coeff: 0.009999999999999998
          kl: 0.01403585801906181
          policy_loss: -0.08455141027354532
          total_loss: -0.09299713460107645
          vf_explained_var: -0.9559390544891357
          vf_loss: 0.0013908376968983147
    num_agent_steps_sampled: 1663000
    num_agent_steps_trained: 1663000
    num_steps_sampled: 1663000
    num_steps_trained: 1663000
  iterations_since_restore: 1663


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1663,41832.4,1663000,-0.07,0,-7,344.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1664000
  custom_metrics: {}
  date: 2021-10-09_10-02-12
  done: false
  episode_len_mean: 346.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4702
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.049185715781318
          entropy_coeff: 0.009999999999999998
          kl: 0.012832063709020345
          policy_loss: -0.11683347908159097
          total_loss: -0.1265608928890692
          vf_explained_var: -0.9831722974777222
          vf_loss: 0.0013887339051709407
    num_agent_steps_sampled: 1664000
    num_agent_steps_trained: 1664000
    num_steps_sampled: 1664000
    num_steps_trained: 1664000
  iterations_since_restore: 1664


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1664,41854.7,1664000,-0.07,0,-7,346.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1665000
  custom_metrics: {}
  date: 2021-10-09_10-02-35
  done: false
  episode_len_mean: 348.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4705
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9990228136380515
          entropy_coeff: 0.009999999999999998
          kl: 0.012104491835637304
          policy_loss: -0.04311014051652617
          total_loss: -0.053078764614959556
          vf_explained_var: -1.0
          vf_loss: 0.0011774897271405078
    num_agent_steps_sampled: 1665000
    num_agent_steps_trained: 1665000
    num_steps_sampled: 1665000
    num_steps_trained: 1665000
  iterations_since_restore: 1665
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1665,41877.4,1665000,-0.07,0,-7,348.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1666000
  custom_metrics: {}
  date: 2021-10-09_10-03-00
  done: false
  episode_len_mean: 348.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4708
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8318237755033704
          entropy_coeff: 0.009999999999999998
          kl: 0.019357990408890538
          policy_loss: -0.0762706039680375
          total_loss: -0.07968537285923957
          vf_explained_var: -0.354109525680542
          vf_loss: 0.0007596040564951383
    num_agent_steps_sampled: 1666000
    num_agent_steps_trained: 1666000
    num_steps_sampled: 1666000
    num_steps_trained: 1666000
  iterations_since_restore: 1666


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1666,41903,1666000,-0.07,0,-7,348.21




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1667000
  custom_metrics: {}
  date: 2021-10-09_10-03-40
  done: false
  episode_len_mean: 347.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4711
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9901662826538087
          entropy_coeff: 0.009999999999999998
          kl: 0.013724623564766109
          policy_loss: 0.041134627266890475
          total_loss: 0.032428324905534586
          vf_explained_var: -0.6085434556007385
          vf_loss: 0.0011675024469796982
    num_agent_steps_sampled: 1667000
    num_agent_steps_trained: 1667000
    num_steps_sampled: 1667000
    num_steps_trained: 1667000
  iterations_since_restore: 166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1667,41942.5,1667000,-0.07,0,-7,347.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1668000
  custom_metrics: {}
  date: 2021-10-09_10-04-02
  done: false
  episode_len_mean: 348.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4713
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.8267397668626573
          entropy_coeff: 0.009999999999999998
          kl: 0.015200204847886025
          policy_loss: -0.060528706593645946
          total_loss: -0.06688967131906086
          vf_explained_var: -0.020717745646834373
          vf_loss: 0.0008004470880324435
    num_agent_steps_sampled: 1668000
    num_agent_steps_trained: 1668000
    num_steps_sampled: 1668000
    num_steps_trained: 1668000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1668,41964.3,1668000,-0.07,0,-7,348.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1669000
  custom_metrics: {}
  date: 2021-10-09_10-04-25
  done: false
  episode_len_mean: 349.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4716
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.0264175401793585
          entropy_coeff: 0.009999999999999998
          kl: 0.01117679202310337
          policy_loss: -0.03762617450621393
          total_loss: -0.04868099763989449
          vf_explained_var: -0.5609019994735718
          vf_loss: 0.001043063037084519
    num_agent_steps_sampled: 1669000
    num_agent_steps_trained: 1669000
    num_steps_sampled: 1669000
    num_steps_trained: 1669000
  iterations_since_restore: 1669


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1669,41987.3,1669000,-0.07,0,-7,349.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1670000
  custom_metrics: {}
  date: 2021-10-09_10-04-48
  done: false
  episode_len_mean: 350.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4719
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.0129660500420465
          entropy_coeff: 0.009999999999999998
          kl: 0.013279651967652524
          policy_loss: -0.11136061193214522
          total_loss: -0.12134912252012227
          vf_explained_var: -0.7895849347114563
          vf_loss: 0.00043840949591766626
    num_agent_steps_sampled: 1670000
    num_agent_steps_trained: 1670000
    num_steps_sampled: 1670000
    num_steps_trained: 1670000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1670,42010.1,1670000,-0.07,0,-7,350.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1671000
  custom_metrics: {}
  date: 2021-10-09_10-05-11
  done: false
  episode_len_mean: 350.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 4721
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.00547432369656
          entropy_coeff: 0.009999999999999998
          kl: 0.012335862277933292
          policy_loss: -0.18038269428329334
          total_loss: -0.1910895149740908
          vf_explained_var: -0.7882957458496094
          vf_loss: 0.00033476165990578013
    num_agent_steps_sampled: 1671000
    num_agent_steps_trained: 1671000
    num_steps_sampled: 1671000
    num_steps_trained: 1671000
  iterations_since_restore: 1671


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1671,42033.3,1671000,-0.07,0,-7,350.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1672000
  custom_metrics: {}
  date: 2021-10-09_10-05-33
  done: false
  episode_len_mean: 351.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4724
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.0274309462971156
          entropy_coeff: 0.009999999999999998
          kl: 0.011461829960892306
          policy_loss: -0.0963477198034525
          total_loss: -0.10768885691132811
          vf_explained_var: -0.911211371421814
          vf_loss: 0.0005586157221437639
    num_agent_steps_sampled: 1672000
    num_agent_steps_trained: 1672000
    num_steps_sampled: 1672000
    num_steps_trained: 1672000
  iterations_since_restore: 1672


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1672,42055.7,1672000,-0.07,0,-7,351.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1673000
  custom_metrics: {}
  date: 2021-10-09_10-06-00
  done: false
  episode_len_mean: 351.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4727
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.5865123311678568
          entropy_coeff: 0.009999999999999998
          kl: 0.01672195747065687
          policy_loss: -0.12124090418219566
          total_loss: -0.1245118722319603
          vf_explained_var: -0.42175647616386414
          vf_loss: 0.00037630293429376455
    num_agent_steps_sampled: 1673000
    num_agent_steps_trained: 1673000
    num_steps_sampled: 1673000
    num_steps_trained: 1673000
  iterations_since_restore: 167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1673,42082.7,1673000,-0.07,0,-7,351.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1674000
  custom_metrics: {}
  date: 2021-10-09_10-06-23
  done: false
  episode_len_mean: 351.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4730
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.842638333638509
          entropy_coeff: 0.009999999999999998
          kl: 0.016206499917706868
          policy_loss: -0.0919060141676002
          total_loss: -0.09790540523827077
          vf_explained_var: -0.4799298644065857
          vf_loss: 0.0005857588863970401
    num_agent_steps_sampled: 1674000
    num_agent_steps_trained: 1674000
    num_steps_sampled: 1674000
    num_steps_trained: 1674000
  iterations_since_restore: 1674


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1674,42105,1674000,-0.07,0,-7,351.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1675000
  custom_metrics: {}
  date: 2021-10-09_10-06-47
  done: false
  episode_len_mean: 350.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 4733
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.6580773168139988
          entropy_coeff: 0.009999999999999998
          kl: 0.011046705307337569
          policy_loss: -0.10802370529207918
          total_loss: -0.11613233923498127
          vf_explained_var: -0.6194794774055481
          vf_loss: 0.00040089784565174746
    num_agent_steps_sampled: 1675000
    num_agent_steps_trained: 1675000
    num_steps_sampled: 1675000
    num_steps_trained: 1675000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1675,42129.5,1675000,-0.07,0,-7,350.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1676000
  custom_metrics: {}
  date: 2021-10-09_10-07-10
  done: false
  episode_len_mean: 351.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4735
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.6871966110335457
          entropy_coeff: 0.009999999999999998
          kl: 0.023955426152114236
          policy_loss: 0.02745628779133161
          total_loss: 0.20343298473291926
          vf_explained_var: 0.08917838335037231
          vf_loss: 0.17534569534731823
    num_agent_steps_sampled: 1676000
    num_agent_steps_trained: 1676000
    num_steps_sampled: 1676000
    num_steps_trained: 1676000
  iterations_since_restore: 1676
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1676,42152.1,1676000,-0.21,0,-14,351.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1677000
  custom_metrics: {}
  date: 2021-10-09_10-07-32
  done: false
  episode_len_mean: 352.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4738
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8864791591962178
          entropy_coeff: 0.009999999999999998
          kl: 0.012307531802188975
          policy_loss: -0.0684533649434646
          total_loss: -0.06431589755747054
          vf_explained_var: -0.5705758333206177
          vf_loss: 0.009513560625621014
    num_agent_steps_sampled: 1677000
    num_agent_steps_trained: 1677000
    num_steps_sampled: 1677000
    num_steps_trained: 1677000
  iterations_since_restore: 1677

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1677,42174.7,1677000,-0.21,0,-14,352.55




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1678000
  custom_metrics: {}
  date: 2021-10-09_10-08-12
  done: false
  episode_len_mean: 353.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4740
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8473860833379958
          entropy_coeff: 0.009999999999999998
          kl: 0.010501247642521803
          policy_loss: -0.13388609314958255
          total_loss: -0.13517149810989698
          vf_explained_var: -0.09229953587055206
          vf_loss: 0.005679393101794024
    num_agent_steps_sampled: 1678000
    num_agent_steps_trained: 1678000
    num_steps_sampled: 1678000
    num_steps_trained: 1678000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1678,42214.8,1678000,-0.21,0,-14,353.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1679000
  custom_metrics: {}
  date: 2021-10-09_10-08-35
  done: false
  episode_len_mean: 354.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4743
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8398162987497118
          entropy_coeff: 0.009999999999999998
          kl: 0.010083902211516754
          policy_loss: -0.07932107460995515
          total_loss: -0.080642409539885
          vf_explained_var: -0.7392829060554504
          vf_loss: 0.006025163185161849
    num_agent_steps_sampled: 1679000
    num_agent_steps_trained: 1679000
    num_steps_sampled: 1679000
    num_steps_trained: 1679000
  iterations_since_restore: 1679
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1679,42237.3,1679000,-0.21,0,-14,354.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1680000
  custom_metrics: {}
  date: 2021-10-09_10-09-00
  done: false
  episode_len_mean: 352.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4746
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8199182245466443
          entropy_coeff: 0.009999999999999998
          kl: 0.009077228906347538
          policy_loss: -0.07402547877281904
          total_loss: -0.06582848448306322
          vf_explained_var: -0.14475172758102417
          vf_loss: 0.016447797160233476
    num_agent_steps_sampled: 1680000
    num_agent_steps_trained: 1680000
    num_steps_sampled: 1680000
    num_steps_trained: 1680000
  iterations_since_restore: 168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1680,42262.7,1680000,-0.21,0,-14,352.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1681000
  custom_metrics: {}
  date: 2021-10-09_10-09-25
  done: false
  episode_len_mean: 351.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4749
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8654632435904608
          entropy_coeff: 0.009999999999999998
          kl: 0.012076205379109004
          policy_loss: -0.07982374644941753
          total_loss: -0.0833743936692675
          vf_explained_var: -0.40662214159965515
          vf_loss: 0.0018688164793679283
    num_agent_steps_sampled: 1681000
    num_agent_steps_trained: 1681000
    num_steps_sampled: 1681000
    num_steps_trained: 1681000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1681,42287.7,1681000,-0.21,0,-14,351.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1682000
  custom_metrics: {}
  date: 2021-10-09_10-09-49
  done: false
  episode_len_mean: 351.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4752
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.7893372350268895
          entropy_coeff: 0.009999999999999998
          kl: 0.010609960156878866
          policy_loss: -0.04969838319553269
          total_loss: -0.0539094110743867
          vf_explained_var: -0.48068591952323914
          vf_loss: 0.0020541358027710683
    num_agent_steps_sampled: 1682000
    num_agent_steps_trained: 1682000
    num_steps_sampled: 1682000
    num_steps_trained: 1682000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1682,42311.4,1682000,-0.21,0,-14,351.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1683000
  custom_metrics: {}
  date: 2021-10-09_10-10-12
  done: false
  episode_len_mean: 351.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4755
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8740677568647597
          entropy_coeff: 0.009999999999999998
          kl: 0.0120417419936677
          policy_loss: -0.0938508259339465
          total_loss: -0.09777687937021255
          vf_explained_var: -0.7894695997238159
          vf_loss: 0.0016172235087853753
    num_agent_steps_sampled: 1683000
    num_agent_steps_trained: 1683000
    num_steps_sampled: 1683000
    num_steps_trained: 1683000
  iterations_since_restore: 1683


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1683,42333.9,1683000,-0.21,0,-14,351.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1684000
  custom_metrics: {}
  date: 2021-10-09_10-10-37
  done: false
  episode_len_mean: 350.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4758
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.841665522257487
          entropy_coeff: 0.009999999999999998
          kl: 0.010618182659611038
          policy_loss: -0.07406557576937808
          total_loss: -0.0783848940498299
          vf_explained_var: -0.2366626262664795
          vf_loss: 0.002460119576426223
    num_agent_steps_sampled: 1684000
    num_agent_steps_trained: 1684000
    num_steps_sampled: 1684000
    num_steps_trained: 1684000
  iterations_since_restore: 1684


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1684,42359.6,1684000,-0.21,0,-14,350.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1685000
  custom_metrics: {}
  date: 2021-10-09_10-10-58
  done: false
  episode_len_mean: 351.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4760
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.904815767871009
          entropy_coeff: 0.009999999999999998
          kl: 0.012422239293146979
          policy_loss: -0.13204293590452937
          total_loss: -0.13600768455200726
          vf_explained_var: -0.8558006882667542
          vf_loss: 0.0014689977455418558
    num_agent_steps_sampled: 1685000
    num_agent_steps_trained: 1685000
    num_steps_sampled: 1685000
    num_steps_trained: 1685000
  iterations_since_restore: 168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1685,42380.4,1685000,-0.21,0,-14,351.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1686000
  custom_metrics: {}
  date: 2021-10-09_10-11-19
  done: false
  episode_len_mean: 353.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4763
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9438549200693767
          entropy_coeff: 0.009999999999999998
          kl: 0.010639936571483549
          policy_loss: -0.08252787594166067
          total_loss: -0.08890880855421225
          vf_explained_var: -0.993480920791626
          vf_loss: 0.0013965538961605893
    num_agent_steps_sampled: 1686000
    num_agent_steps_trained: 1686000
    num_steps_sampled: 1686000
    num_steps_trained: 1686000
  iterations_since_restore: 168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1686,42401.5,1686000,-0.21,0,-14,353.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1687000
  custom_metrics: {}
  date: 2021-10-09_10-11-44
  done: false
  episode_len_mean: 353.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4765
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8221010618739657
          entropy_coeff: 0.009999999999999998
          kl: 0.008898266151375367
          policy_loss: -0.07049808920257622
          total_loss: -0.07829116926425034
          vf_explained_var: -0.5836591720581055
          vf_loss: 0.0006756868119636136
    num_agent_steps_sampled: 1687000
    num_agent_steps_trained: 1687000
    num_steps_sampled: 1687000
    num_steps_trained: 1687000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1687,42426.2,1687000,-0.21,0,-14,353.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1688000
  custom_metrics: {}
  date: 2021-10-09_10-12-07
  done: false
  episode_len_mean: 357.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4768
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9376740442381966
          entropy_coeff: 0.009999999999999998
          kl: 0.011453417869892589
          policy_loss: -0.0798016005092197
          total_loss: -0.08563622501161364
          vf_explained_var: -0.8391814231872559
          vf_loss: 0.0009895027369364268
    num_agent_steps_sampled: 1688000
    num_agent_steps_trained: 1688000
    num_steps_sampled: 1688000
    num_steps_trained: 1688000
  iterations_since_restore: 168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1688,42448.8,1688000,-0.21,0,-14,357.06




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1689000
  custom_metrics: {}
  date: 2021-10-09_10-12-49
  done: false
  episode_len_mean: 356.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4771
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.6799335771136814
          entropy_coeff: 0.009999999999999998
          kl: 0.009272847439333553
          policy_loss: -0.10755236055701971
          total_loss: -0.11345132994982932
          vf_explained_var: -0.9070253968238831
          vf_loss: 0.0007375947691293227
    num_agent_steps_sampled: 1689000
    num_agent_steps_trained: 1689000
    num_steps_sampled: 1689000
    num_steps_trained: 1689000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1689,42491.1,1689000,-0.21,0,-14,356.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1690000
  custom_metrics: {}
  date: 2021-10-09_10-13-11
  done: false
  episode_len_mean: 359.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4773
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.67852351003223
          entropy_coeff: 0.009999999999999998
          kl: 0.007487307587189207
          policy_loss: -0.037190099557240806
          total_loss: 0.003379009746842914
          vf_explained_var: 0.0694134384393692
          vf_loss: 0.04914847780148395
    num_agent_steps_sampled: 1690000
    num_agent_steps_trained: 1690000
    num_steps_sampled: 1690000
    num_steps_trained: 1690000
  iterations_since_restore: 1690
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1690,42512.7,1690000,-0.23,0,-14,359.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1691000
  custom_metrics: {}
  date: 2021-10-09_10-13-33
  done: false
  episode_len_mean: 361.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4776
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.860026478767395
          entropy_coeff: 0.009999999999999998
          kl: 0.013169934013645858
          policy_loss: -0.08830753817326492
          total_loss: -0.08839506299959289
          vf_explained_var: -0.03901953622698784
          vf_loss: 0.004078877049808701
    num_agent_steps_sampled: 1691000
    num_agent_steps_trained: 1691000
    num_steps_sampled: 1691000
    num_steps_trained: 1691000
  iterations_since_restore: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1691,42534.7,1691000,-0.23,0,-14,361.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1692000
  custom_metrics: {}
  date: 2021-10-09_10-13-52
  done: false
  episode_len_mean: 363.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4778
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9094970319006177
          entropy_coeff: 0.009999999999999998
          kl: 0.011747990354239117
          policy_loss: -0.08289307492474715
          total_loss: -0.08758855691800514
          vf_explained_var: -0.994862973690033
          vf_loss: 0.001524036135783212
    num_agent_steps_sampled: 1692000
    num_agent_steps_trained: 1692000
    num_steps_sampled: 1692000
    num_steps_trained: 1692000
  iterations_since_restore: 1692

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1692,42554.5,1692000,-0.23,0,-14,363.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1693000
  custom_metrics: {}
  date: 2021-10-09_10-14-17
  done: false
  episode_len_mean: 365.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4781
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.733178781138526
          entropy_coeff: 0.009999999999999998
          kl: 0.011061469228709934
          policy_loss: -0.12850718926638366
          total_loss: -0.13128574173897506
          vf_explained_var: -0.23733434081077576
          vf_loss: 0.0024301846666882434
    num_agent_steps_sampled: 1693000
    num_agent_steps_trained: 1693000
    num_steps_sampled: 1693000
    num_steps_trained: 1693000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1693,42579,1693000,-0.23,0,-14,365.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1694000
  custom_metrics: {}
  date: 2021-10-09_10-14-40
  done: false
  episode_len_mean: 364.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4784
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8628663261731466
          entropy_coeff: 0.009999999999999998
          kl: 0.011411867075901149
          policy_loss: -0.07672052971190876
          total_loss: -0.08159874851504961
          vf_explained_var: -0.5107695460319519
          vf_loss: 0.0012433750600191868
    num_agent_steps_sampled: 1694000
    num_agent_steps_trained: 1694000
    num_steps_sampled: 1694000
    num_steps_trained: 1694000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1694,42602,1694000,-0.16,0,-14,364.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1695000
  custom_metrics: {}
  date: 2021-10-09_10-15-04
  done: false
  episode_len_mean: 365.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4787
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.780976422627767
          entropy_coeff: 0.009999999999999998
          kl: 0.015833075948731987
          policy_loss: -0.08546202174491352
          total_loss: -0.08499803692102433
          vf_explained_var: -0.5090094804763794
          vf_loss: 0.0009211572647068857
    num_agent_steps_sampled: 1695000
    num_agent_steps_trained: 1695000
    num_steps_sampled: 1695000
    num_steps_trained: 1695000
  iterations_since_restore: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1695,42626.2,1695000,-0.16,0,-14,365.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1696000
  custom_metrics: {}
  date: 2021-10-09_10-15-28
  done: false
  episode_len_mean: 366.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4790
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8265805959701538
          entropy_coeff: 0.009999999999999998
          kl: 0.010716893247319071
          policy_loss: -0.09338392476654715
          total_loss: -0.09870917027195295
          vf_explained_var: -0.7816170454025269
          vf_loss: 0.0011951564765897477
    num_agent_steps_sampled: 1696000
    num_agent_steps_trained: 1696000
    num_steps_sampled: 1696000
    num_steps_trained: 1696000
  iterations_since_restore: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1696,42649.7,1696000,-0.16,0,-14,366.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1697000
  custom_metrics: {}
  date: 2021-10-09_10-15-53
  done: false
  episode_len_mean: 365.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4793
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8635114444626701
          entropy_coeff: 0.009999999999999998
          kl: 0.008173297714466551
          policy_loss: -0.08293514377954933
          total_loss: -0.0909602253490852
          vf_explained_var: -0.4733033776283264
          vf_loss: 0.0016523359805837067
    num_agent_steps_sampled: 1697000
    num_agent_steps_trained: 1697000
    num_steps_sampled: 1697000
    num_steps_trained: 1697000
  iterations_since_restore: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1697,42674.5,1697000,-0.16,0,-14,365.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1698000
  custom_metrics: {}
  date: 2021-10-09_10-16-16
  done: false
  episode_len_mean: 365.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4795
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.7154118127293057
          entropy_coeff: 0.009999999999999998
          kl: 0.011788798945604997
          policy_loss: -0.06729745904190673
          total_loss: -0.07006858299589819
          vf_explained_var: -0.1528542935848236
          vf_loss: 0.0014628121998005858
    num_agent_steps_sampled: 1698000
    num_agent_steps_trained: 1698000
    num_steps_sampled: 1698000
    num_steps_trained: 1698000
  iterations_since_restore: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1698,42698,1698000,-0.16,0,-14,365.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1699000
  custom_metrics: {}
  date: 2021-10-09_10-16-38
  done: false
  episode_len_mean: 365.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4798
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8853370242648655
          entropy_coeff: 0.009999999999999998
          kl: 0.011735554655306915
          policy_loss: -0.06598552535805437
          total_loss: -0.07142029472937186
          vf_explained_var: -0.7341687679290771
          vf_loss: 0.0005567739235832253
    num_agent_steps_sampled: 1699000
    num_agent_steps_trained: 1699000
    num_steps_sampled: 1699000
    num_steps_trained: 1699000
  iterations_since_restore: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1699,42720.2,1699000,-0.16,0,-14,365.74




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1700000
  custom_metrics: {}
  date: 2021-10-09_10-17-17
  done: false
  episode_len_mean: 366.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4801
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9148453209135268
          entropy_coeff: 0.009999999999999998
          kl: 0.010263008010004487
          policy_loss: -0.08088948970867528
          total_loss: -0.08781272576500972
          vf_explained_var: -1.0
          vf_loss: 0.0009772611811058596
    num_agent_steps_sampled: 1700000
    num_agent_steps_trained: 1700000
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
  iterations_since_restore: 1700
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1700,42758.7,1700000,-0.16,0,-14,366.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1701000
  custom_metrics: {}
  date: 2021-10-09_10-17-40
  done: false
  episode_len_mean: 365.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4803
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.818569102552202
          entropy_coeff: 0.009999999999999998
          kl: 0.012287395679743558
          policy_loss: -0.1035867940220568
          total_loss: -0.10797703555888599
          vf_explained_var: -0.6883137822151184
          vf_loss: 0.00032882073137443515
    num_agent_steps_sampled: 1701000
    num_agent_steps_trained: 1701000
    num_steps_sampled: 1701000
    num_steps_trained: 1701000
  iterations_since_restore: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1701,42781.7,1701000,-0.16,0,-14,365.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1702000
  custom_metrics: {}
  date: 2021-10-09_10-18-01
  done: false
  episode_len_mean: 367.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4806
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.946735021803114
          entropy_coeff: 0.009999999999999998
          kl: 0.009772310766359333
          policy_loss: -0.07467730597903331
          total_loss: -0.08309123087674379
          vf_explained_var: -0.9993163347244263
          vf_loss: 0.00034325628593150114
    num_agent_steps_sampled: 1702000
    num_agent_steps_trained: 1702000
    num_steps_sampled: 1702000
    num_steps_trained: 1702000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1702,42803,1702000,-0.16,0,-14,367.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1703000
  custom_metrics: {}
  date: 2021-10-09_10-18-24
  done: false
  episode_len_mean: 367.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4809
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.8125137209892273
          entropy_coeff: 0.009999999999999998
          kl: 0.012325264217967874
          policy_loss: -0.10501440043250719
          total_loss: -0.10932773268885082
          vf_explained_var: -0.20887687802314758
          vf_loss: 0.0003036756376483633
    num_agent_steps_sampled: 1703000
    num_agent_steps_trained: 1703000
    num_steps_sampled: 1703000
    num_steps_trained: 1703000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1703,42825.7,1703000,-0.16,0,-14,367.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1704000
  custom_metrics: {}
  date: 2021-10-09_10-18-48
  done: false
  episode_len_mean: 367.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4811
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.7351464629173279
          entropy_coeff: 0.009999999999999998
          kl: 0.012841598317197228
          policy_loss: -0.10166578179018365
          total_loss: -0.10463326438847516
          vf_explained_var: -0.5448459386825562
          vf_loss: 0.00030996335731793404
    num_agent_steps_sampled: 1704000
    num_agent_steps_trained: 1704000
    num_steps_sampled: 1704000
    num_steps_trained: 1704000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1704,42849.8,1704000,-0.16,0,-14,367.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1705000
  custom_metrics: {}
  date: 2021-10-09_10-19-12
  done: false
  episode_len_mean: 366.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4814
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9721551683213976
          entropy_coeff: 0.009999999999999998
          kl: 0.01006477885373851
          policy_loss: -0.04226257267097632
          total_loss: -0.05071481188966168
          vf_explained_var: -0.7330605387687683
          vf_loss: 0.00023860459388945149
    num_agent_steps_sampled: 1705000
    num_agent_steps_trained: 1705000
    num_steps_sampled: 1705000
    num_steps_trained: 1705000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1705,42873.4,1705000,-0.16,0,-14,366.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1706000
  custom_metrics: {}
  date: 2021-10-09_10-19-36
  done: false
  episode_len_mean: 366.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4817
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.645988314681583
          entropy_coeff: 0.009999999999999998
          kl: 0.01068778278163636
          policy_loss: -0.044914344160093204
          total_loss: -0.04938728287816048
          vf_explained_var: -0.49230122566223145
          vf_loss: 0.00027344409302006373
    num_agent_steps_sampled: 1706000
    num_agent_steps_trained: 1706000
    num_steps_sampled: 1706000
    num_steps_trained: 1706000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1706,42898.2,1706000,-0.16,0,-14,366.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1707000
  custom_metrics: {}
  date: 2021-10-09_10-19-59
  done: false
  episode_len_mean: 366.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4820
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 1.7728324797418382
          entropy_coeff: 0.009999999999999998
          kl: 0.02086407435853622
          policy_loss: 0.06841422845092085
          total_loss: 0.0737448694391383
          vf_explained_var: -0.15570561587810516
          vf_loss: 0.0001925468672804224
    num_agent_steps_sampled: 1707000
    num_agent_steps_trained: 1707000
    num_steps_sampled: 1707000
    num_steps_trained: 1707000
  iterations_since_restore: 1707


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1707,42921.1,1707000,-0.16,0,-14,366.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1708000
  custom_metrics: {}
  date: 2021-10-09_10-20-24
  done: false
  episode_len_mean: 366.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4822
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9014214899804858
          entropy_coeff: 0.009999999999999998
          kl: 0.006822776326513412
          policy_loss: -0.09147886106123528
          total_loss: -0.09910658962196774
          vf_explained_var: -0.770516037940979
          vf_loss: 0.0001701397358879654
    num_agent_steps_sampled: 1708000
    num_agent_steps_trained: 1708000
    num_steps_sampled: 1708000
    num_steps_trained: 1708000
  iterations_since_restore: 1708

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1708,42945.5,1708000,-0.16,0,-14,366.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1709000
  custom_metrics: {}
  date: 2021-10-09_10-20-45
  done: false
  episode_len_mean: 366.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4825
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.7644224868880378
          entropy_coeff: 0.009999999999999998
          kl: 0.006401076532940703
          policy_loss: -0.07048318768954939
          total_loss: -0.076965391356498
          vf_explained_var: -0.6620250344276428
          vf_loss: 0.000638933221246892
    num_agent_steps_sampled: 1709000
    num_agent_steps_trained: 1709000
    num_steps_sampled: 1709000
    num_steps_trained: 1709000
  iterations_since_restore: 1709
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1709,42967,1709000,-0.16,0,-14,366.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1710000
  custom_metrics: {}
  date: 2021-10-09_10-21-08
  done: false
  episode_len_mean: 368.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4828
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.8085338393847148
          entropy_coeff: 0.009999999999999998
          kl: 0.007830744867426078
          policy_loss: -0.1025129299196932
          total_loss: -0.10740951233439976
          vf_explained_var: -0.6336796879768372
          vf_loss: 0.0003153536028953062
    num_agent_steps_sampled: 1710000
    num_agent_steps_trained: 1710000
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
  iterations_since_restore: 1710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1710,42989.4,1710000,-0.16,0,-14,368.58




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1711000
  custom_metrics: {}
  date: 2021-10-09_10-21-45
  done: false
  episode_len_mean: 369.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 2
  episodes_total: 4830
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.839530136850145
          entropy_coeff: 0.009999999999999998
          kl: 0.008851719812897348
          policy_loss: -0.08290879589815935
          total_loss: -0.0866144477079312
          vf_explained_var: -0.7075916528701782
          vf_loss: 0.00013780989263775863
    num_agent_steps_sampled: 1711000
    num_agent_steps_trained: 1711000
    num_steps_sampled: 1711000
    num_steps_trained: 1711000
  iterations_since_restore: 1711

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1711,43026.3,1711000,-0.16,0,-14,369.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1712000
  custom_metrics: {}
  date: 2021-10-09_10-22-11
  done: false
  episode_len_mean: 368.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -14.0
  episodes_this_iter: 3
  episodes_total: 4833
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.682928693294525
          entropy_coeff: 0.009999999999999998
          kl: 0.007862287728087858
          policy_loss: -0.12926737782027986
          total_loss: -0.1330192387310995
          vf_explained_var: -0.9602015018463135
          vf_loss: 0.00015216780245989664
    num_agent_steps_sampled: 1712000
    num_agent_steps_trained: 1712000
    num_steps_sampled: 1712000
    num_steps_trained: 1712000
  iterations_since_restore: 1712

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1712,43052.5,1712000,-0.16,0,-14,368.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1713000
  custom_metrics: {}
  date: 2021-10-09_10-22-31
  done: false
  episode_len_mean: 369.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 4835
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.705836534500122
          entropy_coeff: 0.009999999999999998
          kl: 0.006506448289371609
          policy_loss: -0.03944007141722573
          total_loss: -0.0457167546161347
          vf_explained_var: -0.6399503350257874
          vf_loss: 8.536501376915516e-05
    num_agent_steps_sampled: 1713000
    num_agent_steps_trained: 1713000
    num_steps_sampled: 1713000
    num_steps_trained: 1713000
  iterations_since_restore: 1713
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1713,43072.5,1713000,-0.02,0,-2,369.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1714000
  custom_metrics: {}
  date: 2021-10-09_10-22-53
  done: false
  episode_len_mean: 369.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4838
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9934905197885302
          entropy_coeff: 0.009999999999999998
          kl: 0.009019205408117811
          policy_loss: -0.09928917491601573
          total_loss: -0.10421195361349318
          vf_explained_var: -0.7137966752052307
          vf_loss: 0.00018494800832639965
    num_agent_steps_sampled: 1714000
    num_agent_steps_trained: 1714000
    num_steps_sampled: 1714000
    num_steps_trained: 1714000
  iterations_since_restore: 1714

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1714,43094.4,1714000,-0.02,0,-2,369.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1715000
  custom_metrics: {}
  date: 2021-10-09_10-23-16
  done: false
  episode_len_mean: 369.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4841
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9979568574163649
          entropy_coeff: 0.009999999999999998
          kl: 0.008209101253983577
          policy_loss: -0.05889268306394418
          total_loss: -0.06526780869397852
          vf_explained_var: -0.43042847514152527
          vf_loss: 0.0001090387793131716
    num_agent_steps_sampled: 1715000
    num_agent_steps_trained: 1715000
    num_steps_sampled: 1715000
    num_steps_trained: 1715000
  iterations_since_restore: 1715

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1715,43117.4,1715000,-0.02,0,-2,369.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1716000
  custom_metrics: {}
  date: 2021-10-09_10-23-39
  done: false
  episode_len_mean: 369.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 4843
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9117663820584616
          entropy_coeff: 0.009999999999999998
          kl: 0.008046886443015162
          policy_loss: -0.08899428954141007
          total_loss: -0.09478334014614423
          vf_explained_var: -0.9901367425918579
          vf_loss: 9.98827144535931e-05
    num_agent_steps_sampled: 1716000
    num_agent_steps_trained: 1716000
    num_steps_sampled: 1716000
    num_steps_trained: 1716000
  iterations_since_restore: 1716


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1716,43140.5,1716000,-0.02,0,-2,369.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1717000
  custom_metrics: {}
  date: 2021-10-09_10-24-02
  done: false
  episode_len_mean: 370.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4846
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.600530007150438
          entropy_coeff: 0.009999999999999998
          kl: 0.008369683357563436
          policy_loss: -0.08968901311357816
          total_loss: -0.09185068305167887
          vf_explained_var: -0.47430750727653503
          vf_loss: 8.423694347988607e-05
    num_agent_steps_sampled: 1717000
    num_agent_steps_trained: 1717000
    num_steps_sampled: 1717000
    num_steps_trained: 1717000
  iterations_since_restore: 1717


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1717,43163.7,1717000,-0.02,0,-2,370.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1718000
  custom_metrics: {}
  date: 2021-10-09_10-24-23
  done: false
  episode_len_mean: 372.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4849
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.7106976628303527
          entropy_coeff: 0.009999999999999998
          kl: 0.008828564851231707
          policy_loss: -0.0982740058356689
          total_loss: -0.10070875955538618
          vf_explained_var: -0.9186425805091858
          vf_loss: 0.0001584483309771814
    num_agent_steps_sampled: 1718000
    num_agent_steps_trained: 1718000
    num_steps_sampled: 1718000
    num_steps_trained: 1718000
  iterations_since_restore: 1718


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1718,43185,1718000,-0.02,0,-2,372.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1719000
  custom_metrics: {}
  date: 2021-10-09_10-24-45
  done: false
  episode_len_mean: 372.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 4851
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9068398144510057
          entropy_coeff: 0.009999999999999998
          kl: 0.00828023140401031
          policy_loss: -0.10885836745922764
          total_loss: -0.11419968286322223
          vf_explained_var: -0.9999297857284546
          vf_loss: 0.00011474289664571794
    num_agent_steps_sampled: 1719000
    num_agent_steps_trained: 1719000
    num_steps_sampled: 1719000
    num_steps_trained: 1719000
  iterations_since_restore: 1719

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1719,43206.2,1719000,-0.02,0,-2,372.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1720000
  custom_metrics: {}
  date: 2021-10-09_10-25-08
  done: false
  episode_len_mean: 373.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4854
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.8979560322231717
          entropy_coeff: 0.009999999999999998
          kl: 0.008344009143006816
          policy_loss: -0.10140566523704264
          total_loss: -0.10651978842086263
          vf_explained_var: -0.2637556195259094
          vf_loss: 0.00014824946160337682
    num_agent_steps_sampled: 1720000
    num_agent_steps_trained: 1720000
    num_steps_sampled: 1720000
    num_steps_trained: 1720000
  iterations_since_restore: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1720,43229.5,1720000,-0.02,0,-2,373.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1721000
  custom_metrics: {}
  date: 2021-10-09_10-25-30
  done: false
  episode_len_mean: 373.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 4856
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.965928590297699
          entropy_coeff: 0.009999999999999998
          kl: 0.007266924041547422
          policy_loss: -0.10790397686262926
          total_loss: -0.11547325766748852
          vf_explained_var: -0.7065114378929138
          vf_loss: 0.00014349820996560286
    num_agent_steps_sampled: 1721000
    num_agent_steps_trained: 1721000
    num_steps_sampled: 1721000
    num_steps_trained: 1721000
  iterations_since_restore: 1721

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1721,43251.9,1721000,-0.02,0,-2,373.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1722000
  custom_metrics: {}
  date: 2021-10-09_10-25-54
  done: false
  episode_len_mean: 373.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4859
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.8571395993232727
          entropy_coeff: 0.009999999999999998
          kl: 0.009783505350233984
          policy_loss: -0.06436757588138182
          total_loss: -0.06678184850348366
          vf_explained_var: -0.8973901271820068
          vf_loss: 7.346762486122315e-05
    num_agent_steps_sampled: 1722000
    num_agent_steps_trained: 1722000
    num_steps_sampled: 1722000
    num_steps_trained: 1722000
  iterations_since_restore: 1722

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1722,43275.7,1722000,-0.02,0,-2,373.99




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1723000
  custom_metrics: {}
  date: 2021-10-09_10-26-38
  done: false
  episode_len_mean: 370.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 4863
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.7050817953215704
          entropy_coeff: 0.009999999999999998
          kl: 0.008994490348863573
          policy_loss: -0.10978630009210771
          total_loss: -0.11198383093708092
          vf_explained_var: 0.15987582504749298
          vf_loss: 6.673979854288821e-05
    num_agent_steps_sampled: 1723000
    num_agent_steps_trained: 1723000
    num_steps_sampled: 1723000
    num_steps_trained: 1723000
  iterations_since_restore: 1723

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1723,43319.4,1723000,-0.02,0,-2,370.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1724000
  custom_metrics: {}
  date: 2021-10-09_10-27-01
  done: false
  episode_len_mean: 370.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 4865
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.8857403609487746
          entropy_coeff: 0.009999999999999998
          kl: 0.009075894914098765
          policy_loss: -0.021969023533165453
          total_loss: -0.025847041689687305
          vf_explained_var: -0.9865751266479492
          vf_loss: 5.900971785498162e-05
    num_agent_steps_sampled: 1724000
    num_agent_steps_trained: 1724000
    num_steps_sampled: 1724000
    num_steps_trained: 1724000
  iterations_since_restore: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1724,43342.8,1724000,-0.02,0,-2,370.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1725000
  custom_metrics: {}
  date: 2021-10-09_10-27-25
  done: false
  episode_len_mean: 370.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4868
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.800793668958876
          entropy_coeff: 0.009999999999999998
          kl: 0.006341121196018642
          policy_loss: -0.0905892496307691
          total_loss: -0.09809658614297707
          vf_explained_var: -0.902484655380249
          vf_loss: 7.607371298945509e-05
    num_agent_steps_sampled: 1725000
    num_agent_steps_trained: 1725000
    num_steps_sampled: 1725000
    num_steps_trained: 1725000
  iterations_since_restore: 1725
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1725,43366.1,1725000,-0.02,0,-2,370.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1726000
  custom_metrics: {}
  date: 2021-10-09_10-27-46
  done: false
  episode_len_mean: 371.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 4871
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9388682232962715
          entropy_coeff: 0.009999999999999998
          kl: 0.008160968187694647
          policy_loss: -0.08494081325415108
          total_loss: -0.09085063466595279
          vf_explained_var: -0.6020946502685547
          vf_loss: 6.25843947192253e-05
    num_agent_steps_sampled: 1726000
    num_agent_steps_trained: 1726000
    num_steps_sampled: 1726000
    num_steps_trained: 1726000
  iterations_since_restore: 1726


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1726,43387.1,1726000,-0.02,0,-2,371.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1727000
  custom_metrics: {}
  date: 2021-10-09_10-28-10
  done: false
  episode_len_mean: 370.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4873
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.8868496033880446
          entropy_coeff: 0.009999999999999998
          kl: 0.005698347631109455
          policy_loss: -0.05580181017931965
          total_loss: -0.06527264342746801
          vf_explained_var: -0.9634843468666077
          vf_loss: 2.9826404867587066e-05
    num_agent_steps_sampled: 1727000
    num_agent_steps_trained: 1727000
    num_steps_sampled: 1727000
    num_steps_trained: 1727000
  iterations_since_restore: 1727
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1727,43411.6,1727000,0,0,0,370.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1728000
  custom_metrics: {}
  date: 2021-10-09_10-28-34
  done: false
  episode_len_mean: 369.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4876
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.7944084472126431
          entropy_coeff: 0.009999999999999998
          kl: 0.0064673383735127255
          policy_loss: -0.04819682724773884
          total_loss: -0.055410359882646136
          vf_explained_var: -0.8291407823562622
          vf_loss: 9.852932861475791e-05
    num_agent_steps_sampled: 1728000
    num_agent_steps_trained: 1728000
    num_steps_sampled: 1728000
    num_steps_trained: 1728000
  iterations_since_restore: 1728


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1728,43435.7,1728000,0,0,0,369.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1729000
  custom_metrics: {}
  date: 2021-10-09_10-28-57
  done: false
  episode_len_mean: 367.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4879
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.9081651065084668
          entropy_coeff: 0.009999999999999998
          kl: 0.008774723210315067
          policy_loss: -0.12364663727995422
          total_loss: -0.1282337674043245
          vf_explained_var: -0.5040804743766785
          vf_loss: 6.926002044767503e-05
    num_agent_steps_sampled: 1729000
    num_agent_steps_trained: 1729000
    num_steps_sampled: 1729000
    num_steps_trained: 1729000
  iterations_since_restore: 1729
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1729,43458.5,1729000,0,0,0,367.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1730000
  custom_metrics: {}
  date: 2021-10-09_10-29-22
  done: false
  episode_len_mean: 366.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4882
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.643956269143382
          cur_lr: 5.000000000000001e-05
          entropy: 1.7877088096406726
          entropy_coeff: 0.009999999999999998
          kl: 0.0036321930031220774
          policy_loss: -0.038344718515872955
          total_loss: -0.04949204437434673
          vf_explained_var: -0.4148936867713928
          vf_loss: 0.0007585936957588678
    num_agent_steps_sampled: 1730000
    num_agent_steps_trained: 1730000
    num_steps_sampled: 1730000
    num_steps_trained: 1730000
  iterations_since_restore: 1730


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1730,43483,1730000,0,0,0,366.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1731000
  custom_metrics: {}
  date: 2021-10-09_10-29-45
  done: false
  episode_len_mean: 367.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4885
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8944786296950447
          entropy_coeff: 0.009999999999999998
          kl: 0.013331959938397454
          policy_loss: -0.08457376956939697
          total_loss: -0.09245802618356215
          vf_explained_var: -0.39940595626831055
          vf_loss: 0.00010194775044914826
    num_agent_steps_sampled: 1731000
    num_agent_steps_trained: 1731000
    num_steps_sampled: 1731000
    num_steps_trained: 1731000
  iterations_since_restore: 1731


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1731,43506.1,1731000,0,0,0,367.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1732000
  custom_metrics: {}
  date: 2021-10-09_10-30-07
  done: false
  episode_len_mean: 368.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4887
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9212204588784112
          entropy_coeff: 0.009999999999999998
          kl: 0.012432442495918907
          policy_loss: -0.11161402890251743
          total_loss: -0.12048438456323411
          vf_explained_var: -0.6723105907440186
          vf_loss: 0.00012265166547472796
    num_agent_steps_sampled: 1732000
    num_agent_steps_trained: 1732000
    num_steps_sampled: 1732000
    num_steps_trained: 1732000
  iterations_since_restore: 1732
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1732,43528,1732000,0,0,0,368.42




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1733000
  custom_metrics: {}
  date: 2021-10-09_10-30-46
  done: false
  episode_len_mean: 368.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4890
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8441858662499322
          entropy_coeff: 0.009999999999999998
          kl: 0.012453244333015019
          policy_loss: -0.04600687126318614
          total_loss: -0.05414604728834497
          vf_explained_var: -0.34324586391448975
          vf_loss: 6.638835915914064e-05
    num_agent_steps_sampled: 1733000
    num_agent_steps_trained: 1733000
    num_steps_sampled: 1733000
    num_steps_trained: 1733000
  iterations_since_restore: 1733
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1733,43567.4,1733000,0,0,0,368.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1734000
  custom_metrics: {}
  date: 2021-10-09_10-31-11
  done: false
  episode_len_mean: 368.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4893
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8757813493410747
          entropy_coeff: 0.009999999999999998
          kl: 0.0181725173855922
          policy_loss: -0.08065922462070982
          total_loss: -0.08441973445523117
          vf_explained_var: -0.1591767519712448
          vf_loss: 5.989162451037878e-05
    num_agent_steps_sampled: 1734000
    num_agent_steps_trained: 1734000
    num_steps_sampled: 1734000
    num_steps_trained: 1734000
  iterations_since_restore: 1734
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1734,43592.5,1734000,0,0,0,368.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1735000
  custom_metrics: {}
  date: 2021-10-09_10-31-35
  done: false
  episode_len_mean: 368.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4896
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8368045210838317
          entropy_coeff: 0.009999999999999998
          kl: 0.014191906176431996
          policy_loss: -0.09478153222137027
          total_loss: -0.10141394825445281
          vf_explained_var: -0.43380042910575867
          vf_loss: 7.018744688846507e-05
    num_agent_steps_sampled: 1735000
    num_agent_steps_trained: 1735000
    num_steps_sampled: 1735000
    num_steps_trained: 1735000
  iterations_since_restore: 1735
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1735,43616.4,1735000,0,0,0,368.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1736000
  custom_metrics: {}
  date: 2021-10-09_10-31-59
  done: false
  episode_len_mean: 367.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4899
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9016470551490783
          entropy_coeff: 0.009999999999999998
          kl: 0.010258915582318758
          policy_loss: -0.046811552407840885
          total_loss: -0.05735412794682715
          vf_explained_var: -0.48865246772766113
          vf_loss: 4.128871786532626e-05
    num_agent_steps_sampled: 1736000
    num_agent_steps_trained: 1736000
    num_steps_sampled: 1736000
    num_steps_trained: 1736000
  iterations_since_restore: 1736


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1736,43639.9,1736000,0,0,0,367.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1737000
  custom_metrics: {}
  date: 2021-10-09_10-32-22
  done: false
  episode_len_mean: 367.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4901
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.802313498655955
          entropy_coeff: 0.009999999999999998
          kl: 0.011269120574507498
          policy_loss: -0.05990392772687806
          total_loss: -0.06857573878433969
          vf_explained_var: -0.7849201560020447
          vf_loss: 8.835171754779165e-05
    num_agent_steps_sampled: 1737000
    num_agent_steps_trained: 1737000
    num_steps_sampled: 1737000
    num_steps_trained: 1737000
  iterations_since_restore: 1737
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1737,43662.9,1737000,0,0,0,367.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1738000
  custom_metrics: {}
  date: 2021-10-09_10-32-47
  done: false
  episode_len_mean: 366.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4904
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9374929030736288
          entropy_coeff: 0.009999999999999998
          kl: 0.010938174945298368
          policy_loss: -0.03688112056503693
          total_loss: -0.047198077311946286
          vf_explained_var: -0.705863893032074
          vf_loss: 6.703225313281615e-05
    num_agent_steps_sampled: 1738000
    num_agent_steps_trained: 1738000
    num_steps_sampled: 1738000
    num_steps_trained: 1738000
  iterations_since_restore: 1738
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1738,43688.2,1738000,0,0,0,366.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1739000
  custom_metrics: {}
  date: 2021-10-09_10-33-14
  done: false
  episode_len_mean: 364.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4907
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.5621615476078456
          entropy_coeff: 0.009999999999999998
          kl: 0.013666598370240063
          policy_loss: -0.05670381519529555
          total_loss: -0.061054942508538566
          vf_explained_var: 0.16185146570205688
          vf_loss: 3.684250443459152e-05
    num_agent_steps_sampled: 1739000
    num_agent_steps_trained: 1739000
    num_steps_sampled: 1739000
    num_steps_trained: 1739000
  iterations_since_restore: 1739
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1739,43714.8,1739000,0,0,0,364.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1740000
  custom_metrics: {}
  date: 2021-10-09_10-33-35
  done: false
  episode_len_mean: 364.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4910
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.859014168050554
          entropy_coeff: 0.009999999999999998
          kl: 0.014058891575075765
          policy_loss: -0.11349812973704602
          total_loss: -0.12049378127687507
          vf_explained_var: -0.6920422315597534
          vf_loss: 3.839033908863914e-05
    num_agent_steps_sampled: 1740000
    num_agent_steps_trained: 1740000
    num_steps_sampled: 1740000
    num_steps_trained: 1740000
  iterations_since_restore: 1740
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1740,43736.5,1740000,0,0,0,364.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1741000
  custom_metrics: {}
  date: 2021-10-09_10-34-01
  done: false
  episode_len_mean: 364.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4913
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.7895885109901428
          entropy_coeff: 0.009999999999999998
          kl: 0.013696130067127625
          policy_loss: -0.01583315682493978
          total_loss: -0.022364631584948964
          vf_explained_var: -0.6458331942558289
          vf_loss: 0.00010648765922572541
    num_agent_steps_sampled: 1741000
    num_agent_steps_trained: 1741000
    num_steps_sampled: 1741000
    num_steps_trained: 1741000
  iterations_since_restore: 1741


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1741,43761.9,1741000,0,0,0,364.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1742000
  custom_metrics: {}
  date: 2021-10-09_10-34-25
  done: false
  episode_len_mean: 365.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4916
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8380086951785617
          entropy_coeff: 0.009999999999999998
          kl: 0.011302273561512009
          policy_loss: -0.08871113980809847
          total_loss: -0.09775374564859603
          vf_explained_var: -0.42853814363479614
          vf_loss: 4.725925946331699e-05
    num_agent_steps_sampled: 1742000
    num_agent_steps_trained: 1742000
    num_steps_sampled: 1742000
    num_steps_trained: 1742000
  iterations_since_restore: 1742
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1742,43785.6,1742000,0,0,0,365.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1743000
  custom_metrics: {}
  date: 2021-10-09_10-34-49
  done: false
  episode_len_mean: 364.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4918
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.6179724229706658
          entropy_coeff: 0.009999999999999998
          kl: 0.012486244834550833
          policy_loss: -0.02815691167488694
          total_loss: -0.034044702413181464
          vf_explained_var: -0.03069235198199749
          vf_loss: 2.8510319478098405e-05
    num_agent_steps_sampled: 1743000
    num_agent_steps_trained: 1743000
    num_steps_sampled: 1743000
    num_steps_trained: 1743000
  iterations_since_restore: 1743

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1743,43809.6,1743000,0,0,0,364.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1744000
  custom_metrics: {}
  date: 2021-10-09_10-35-29
  done: false
  episode_len_mean: 363.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4921
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.7314524359173245
          entropy_coeff: 0.009999999999999998
          kl: 0.01446775805154081
          policy_loss: -0.037577901490860516
          total_loss: -0.042973530872000586
          vf_explained_var: -0.5091901421546936
          vf_loss: 2.6716034628609326e-05
    num_agent_steps_sampled: 1744000
    num_agent_steps_trained: 1744000
    num_steps_sampled: 1744000
    num_steps_trained: 1744000
  iterations_since_restore: 1744


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1744,43849.8,1744000,0,0,0,363.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1745000
  custom_metrics: {}
  date: 2021-10-09_10-35-50
  done: false
  episode_len_mean: 365.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4924
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.4107615262269975
          entropy_coeff: 0.009999999999999998
          kl: 0.012717002301733555
          policy_loss: -0.14088921517961556
          total_loss: -0.1425299205713802
          vf_explained_var: -0.2497119903564453
          vf_loss: 0.002013809654340548
    num_agent_steps_sampled: 1745000
    num_agent_steps_trained: 1745000
    num_steps_sampled: 1745000
    num_steps_trained: 1745000
  iterations_since_restore: 1745
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1745,43870.7,1745000,0,0,0,365.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1746000
  custom_metrics: {}
  date: 2021-10-09_10-36-15
  done: false
  episode_len_mean: 362.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4927
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8185049215952556
          entropy_coeff: 0.009999999999999998
          kl: 0.012100077636246058
          policy_loss: -0.05361442234781053
          total_loss: -0.061803231409026514
          vf_explained_var: -0.4538419544696808
          vf_loss: 5.023886464591164e-05
    num_agent_steps_sampled: 1746000
    num_agent_steps_trained: 1746000
    num_steps_sampled: 1746000
    num_steps_trained: 1746000
  iterations_since_restore: 1746
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1746,43895.6,1746000,0,0,0,362.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1747000
  custom_metrics: {}
  date: 2021-10-09_10-36-38
  done: false
  episode_len_mean: 361.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4930
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8495801607767741
          entropy_coeff: 0.009999999999999998
          kl: 0.01239397428679041
          policy_loss: -0.05453102211985323
          total_loss: -0.0628093886292643
          vf_explained_var: -0.31994906067848206
          vf_loss: 2.985865440779081e-05
    num_agent_steps_sampled: 1747000
    num_agent_steps_trained: 1747000
    num_steps_sampled: 1747000
    num_steps_trained: 1747000
  iterations_since_restore: 1747
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1747,43918.8,1747000,0,0,0,361.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1748000
  custom_metrics: {}
  date: 2021-10-09_10-37-02
  done: false
  episode_len_mean: 361.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4933
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.663238200876448
          entropy_coeff: 0.009999999999999998
          kl: 0.015239348210988917
          policy_loss: -0.06274288105260994
          total_loss: -0.06678491543150611
          vf_explained_var: -0.43244561553001404
          vf_loss: 6.393982334379365e-05
    num_agent_steps_sampled: 1748000
    num_agent_steps_trained: 1748000
    num_steps_sampled: 1748000
    num_steps_trained: 1748000
  iterations_since_restore: 1748
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1748,43943.3,1748000,0,0,0,361.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1749000
  custom_metrics: {}
  date: 2021-10-09_10-37-28
  done: false
  episode_len_mean: 358.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4936
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.715260910987854
          entropy_coeff: 0.009999999999999998
          kl: 0.012708642678021992
          policy_loss: -0.037808839976787564
          total_loss: -0.044484911155369546
          vf_explained_var: -0.7544748783111572
          vf_loss: 3.031216668508326e-05
    num_agent_steps_sampled: 1749000
    num_agent_steps_trained: 1749000
    num_steps_sampled: 1749000
    num_steps_trained: 1749000
  iterations_since_restore: 1749
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1749,43968.5,1749000,0,0,0,358.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1750000
  custom_metrics: {}
  date: 2021-10-09_10-37-52
  done: false
  episode_len_mean: 357.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4939
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9345145967271593
          entropy_coeff: 0.009999999999999998
          kl: 0.013317968508762669
          policy_loss: -0.03193702436983585
          total_loss: -0.040303176393111546
          vf_explained_var: -0.21849726140499115
          vf_loss: 3.191383506823008e-05
    num_agent_steps_sampled: 1750000
    num_agent_steps_trained: 1750000
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
  iterations_since_restore: 1750


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1750,43993.3,1750000,0,0,0,357.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1751000
  custom_metrics: {}
  date: 2021-10-09_10-38-15
  done: false
  episode_len_mean: 356.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4941
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.7424384872118632
          entropy_coeff: 0.009999999999999998
          kl: 0.010484132927137206
          policy_loss: -0.09371763544364108
          total_loss: -0.10247878964162535
          vf_explained_var: -1.0
          vf_loss: 4.55027030612756e-05
    num_agent_steps_sampled: 1751000
    num_agent_steps_trained: 1751000
    num_steps_sampled: 1751000
    num_steps_trained: 1751000
  iterations_since_restore: 1751
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1751,44015.5,1751000,0,0,0,356.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1752000
  custom_metrics: {}
  date: 2021-10-09_10-38-39
  done: false
  episode_len_mean: 356.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4944
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.7936133649614123
          entropy_coeff: 0.009999999999999998
          kl: 0.013594163956537682
          policy_loss: -0.06971069183200598
          total_loss: -0.07642810872445503
          vf_explained_var: -0.6846818923950195
          vf_loss: 4.460822967505212e-05
    num_agent_steps_sampled: 1752000
    num_agent_steps_trained: 1752000
    num_steps_sampled: 1752000
    num_steps_trained: 1752000
  iterations_since_restore: 1752
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1752,44040.1,1752000,0,0,0,356.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1753000
  custom_metrics: {}
  date: 2021-10-09_10-39-02
  done: false
  episode_len_mean: 356.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4947
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.7940354731347825
          entropy_coeff: 0.009999999999999998
          kl: 0.01131763087134598
          policy_loss: -0.08247676081955432
          total_loss: -0.09103864932225811
          vf_explained_var: -0.9233283996582031
          vf_loss: 7.561793089634092e-05
    num_agent_steps_sampled: 1753000
    num_agent_steps_trained: 1753000
    num_steps_sampled: 1753000
    num_steps_trained: 1753000
  iterations_since_restore: 1753
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1753,44062.7,1753000,0,0,0,356.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1754000
  custom_metrics: {}
  date: 2021-10-09_10-39-42
  done: false
  episode_len_mean: 354.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4950
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.6793879283799065
          entropy_coeff: 0.009999999999999998
          kl: 0.011639608980805535
          policy_loss: -0.05496537507408195
          total_loss: -0.062151094401876134
          vf_explained_var: -0.372831255197525
          vf_loss: 4.065791534028701e-05
    num_agent_steps_sampled: 1754000
    num_agent_steps_trained: 1754000
    num_steps_sampled: 1754000
    num_steps_trained: 1754000
  iterations_since_restore: 1754
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1754,44103.3,1754000,0,0,0,354.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1755000
  custom_metrics: {}
  date: 2021-10-09_10-40-09
  done: false
  episode_len_mean: 354.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4953
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.8545519590377808
          entropy_coeff: 0.009999999999999998
          kl: 0.021675194496031006
          policy_loss: -0.0806971537363198
          total_loss: -0.08139942760268846
          vf_explained_var: 0.21659061312675476
          vf_loss: 2.6710312047523783e-05
    num_agent_steps_sampled: 1755000
    num_agent_steps_trained: 1755000
    num_steps_sampled: 1755000
    num_steps_trained: 1755000
  iterations_since_restore: 1755
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1755,44129.4,1755000,0,0,0,354.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1756000
  custom_metrics: {}
  date: 2021-10-09_10-40-35
  done: false
  episode_len_mean: 353.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4956
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.8418430752224393
          entropy_coeff: 0.009999999999999998
          kl: 0.011320812918210708
          policy_loss: -0.02683724673050973
          total_loss: -0.031276196189638644
          vf_explained_var: -0.6988562345504761
          vf_loss: 2.1289414194648593e-05
    num_agent_steps_sampled: 1756000
    num_agent_steps_trained: 1756000
    num_steps_sampled: 1756000
    num_steps_trained: 1756000
  iterations_since_restore: 1756

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1756,44156.1,1756000,0,0,0,353.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1757000
  custom_metrics: {}
  date: 2021-10-09_10-41-01
  done: false
  episode_len_mean: 352.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4959
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.6988221764564515
          entropy_coeff: 0.009999999999999998
          kl: 0.009608689259712349
          policy_loss: -0.10698739029467105
          total_loss: -0.11208831357459227
          vf_explained_var: -0.6082863211631775
          vf_loss: 4.00952911958383e-05
    num_agent_steps_sampled: 1757000
    num_agent_steps_trained: 1757000
    num_steps_sampled: 1757000
    num_steps_trained: 1757000
  iterations_since_restore: 1757
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1757,44181.7,1757000,0,0,0,352.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1758000
  custom_metrics: {}
  date: 2021-10-09_10-41-25
  done: false
  episode_len_mean: 354.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4961
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.7545816447999742
          entropy_coeff: 0.009999999999999998
          kl: 0.009251862247289506
          policy_loss: -0.07190843224525452
          total_loss: -0.07802713186376625
          vf_explained_var: -0.7290458679199219
          vf_loss: 1.9872195985549803e-05
    num_agent_steps_sampled: 1758000
    num_agent_steps_trained: 1758000
    num_steps_sampled: 1758000
    num_steps_trained: 1758000
  iterations_since_restore: 1758


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1758,44206,1758000,0,0,0,354.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1759000
  custom_metrics: {}
  date: 2021-10-09_10-41-52
  done: false
  episode_len_mean: 354.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4964
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.887089040544298
          entropy_coeff: 0.009999999999999998
          kl: 0.010168079739591896
          policy_loss: -0.01756197340372536
          total_loss: -0.02388382690648238
          vf_explained_var: -0.4013524055480957
          vf_loss: 1.2128381963015677e-05
    num_agent_steps_sampled: 1759000
    num_agent_steps_trained: 1759000
    num_steps_sampled: 1759000
    num_steps_trained: 1759000
  iterations_since_restore: 1759
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1759,44232.8,1759000,0,0,0,354.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1760000
  custom_metrics: {}
  date: 2021-10-09_10-42-20
  done: false
  episode_len_mean: 353.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4967
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.7788375284936693
          entropy_coeff: 0.009999999999999998
          kl: 0.005005476405893777
          policy_loss: -0.029241003882553844
          total_loss: -0.040825412712163396
          vf_explained_var: -0.5633683204650879
          vf_loss: 3.237782428995059e-05
    num_agent_steps_sampled: 1760000
    num_agent_steps_trained: 1760000
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
  iterations_since_restore: 1760


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1760,44260.3,1760000,0,0,0,353.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1761000
  custom_metrics: {}
  date: 2021-10-09_10-42-46
  done: false
  episode_len_mean: 352.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4970
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2329672018575368
          cur_lr: 5.000000000000001e-05
          entropy: 1.9552822086546156
          entropy_coeff: 0.009999999999999998
          kl: 0.0021947517484850266
          policy_loss: -0.029813189131932125
          total_loss: -0.046483682654798034
          vf_explained_var: -0.8142867088317871
          vf_loss: 0.00017627051994269197
    num_agent_steps_sampled: 1761000
    num_agent_steps_trained: 1761000
    num_steps_sampled: 1761000
    num_steps_trained: 1761000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1761,44286.7,1761000,0,0,0,352.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1762000
  custom_metrics: {}
  date: 2021-10-09_10-43-16
  done: false
  episode_len_mean: 350.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4973
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.669602542453342
          entropy_coeff: 0.009999999999999998
          kl: 0.01880583030288098
          policy_loss: -0.04052934433437056
          total_loss: -0.045601763038171664
          vf_explained_var: -0.5161117315292358
          vf_loss: 3.0121358981381894e-05
    num_agent_steps_sampled: 1762000
    num_agent_steps_trained: 1762000
    num_steps_sampled: 1762000
    num_steps_trained: 1762000
  iterations_since_restore: 1762
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1762,44316.7,1762000,0,0,0,350.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1763000
  custom_metrics: {}
  date: 2021-10-09_10-43-43
  done: false
  episode_len_mean: 349.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4976
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.8707820508215163
          entropy_coeff: 0.009999999999999998
          kl: 0.01425894797594363
          policy_loss: -0.1129344855952594
          total_loss: -0.12280404443542163
          vf_explained_var: 0.29295048117637634
          vf_loss: 4.7855834499286074e-05
    num_agent_steps_sampled: 1763000
    num_agent_steps_trained: 1763000
    num_steps_sampled: 1763000
    num_steps_trained: 1763000
  iterations_since_restore: 1763
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1763,44343.4,1763000,0,0,0,349.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1764000
  custom_metrics: {}
  date: 2021-10-09_10-44-08
  done: false
  episode_len_mean: 349.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4979
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.8316020475493537
          entropy_coeff: 0.009999999999999998
          kl: 0.01709420906012519
          policy_loss: -0.10249239496058887
          total_loss: -0.11023514875107342
          vf_explained_var: -0.4668847322463989
          vf_loss: 3.496822183579853e-05
    num_agent_steps_sampled: 1764000
    num_agent_steps_trained: 1764000
    num_steps_sampled: 1764000
    num_steps_trained: 1764000
  iterations_since_restore: 1764
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1764,44368.8,1764000,0,0,0,349.52




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1765000
  custom_metrics: {}
  date: 2021-10-09_10-44-51
  done: false
  episode_len_mean: 349.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4982
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.6852642165289984
          entropy_coeff: 0.009999999999999998
          kl: 0.013246392904016899
          policy_loss: -0.04652109050916301
          total_loss: -0.05517886156837146
          vf_explained_var: -0.45780807733535767
          vf_loss: 2.86860223746367e-05
    num_agent_steps_sampled: 1765000
    num_agent_steps_trained: 1765000
    num_steps_sampled: 1765000
    num_steps_trained: 1765000
  iterations_since_restore: 1765
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1765,44411.9,1765000,0,0,0,349.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1766000
  custom_metrics: {}
  date: 2021-10-09_10-45-15
  done: false
  episode_len_mean: 347.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4985
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.7138158679008484
          entropy_coeff: 0.009999999999999998
          kl: 0.015515628949794092
          policy_loss: -0.06077467654314306
          total_loss: -0.06832250085555845
          vf_explained_var: -0.531370222568512
          vf_loss: 2.5202244675407807e-05
    num_agent_steps_sampled: 1766000
    num_agent_steps_trained: 1766000
    num_steps_sampled: 1766000
    num_steps_trained: 1766000
  iterations_since_restore: 1766
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1766,44435.7,1766000,0,0,0,347.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1767000
  custom_metrics: {}
  date: 2021-10-09_10-45-40
  done: false
  episode_len_mean: 345.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4988
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.6070028848118252
          entropy_coeff: 0.009999999999999998
          kl: 0.010906001017552965
          policy_loss: -0.0200296723180347
          total_loss: -0.02935039471420977
          vf_explained_var: -0.29057273268699646
          vf_loss: 2.5933922041885023e-05
    num_agent_steps_sampled: 1767000
    num_agent_steps_trained: 1767000
    num_steps_sampled: 1767000
    num_steps_trained: 1767000
  iterations_since_restore: 1767


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1767,44460.7,1767000,0,0,0,345.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1768000
  custom_metrics: {}
  date: 2021-10-09_10-46-02
  done: false
  episode_len_mean: 347.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4991
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.7971437083350288
          entropy_coeff: 0.009999999999999998
          kl: 0.018510278796534784
          policy_loss: -0.1385668377081553
          total_loss: -0.1451052008403672
          vf_explained_var: -0.025064479559659958
          vf_loss: 2.179096735643624e-05
    num_agent_steps_sampled: 1768000
    num_agent_steps_trained: 1768000
    num_steps_sampled: 1768000
    num_steps_trained: 1768000
  iterations_since_restore: 1768
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1768,44482.6,1768000,0,0,0,347.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1769000
  custom_metrics: {}
  date: 2021-10-09_10-46-28
  done: false
  episode_len_mean: 345.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4994
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.6351595958073935
          entropy_coeff: 0.009999999999999998
          kl: 0.014614473407291781
          policy_loss: -0.10437180201212565
          total_loss: -0.1117005416088634
          vf_explained_var: -0.1002519428730011
          vf_loss: 1.3272462415948717e-05
    num_agent_steps_sampled: 1769000
    num_agent_steps_trained: 1769000
    num_steps_sampled: 1769000
    num_steps_trained: 1769000
  iterations_since_restore: 1769
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1769,44508.6,1769000,0,0,0,345.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1770000
  custom_metrics: {}
  date: 2021-10-09_10-46-52
  done: false
  episode_len_mean: 345.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 4997
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.6117307000690035
          entropy_coeff: 0.009999999999999998
          kl: 0.014743729644790843
          policy_loss: -0.07322364186661111
          total_loss: -0.08022599269946416
          vf_explained_var: -0.7424439191818237
          vf_loss: 2.5685687099515538e-05
    num_agent_steps_sampled: 1770000
    num_agent_steps_trained: 1770000
    num_steps_sampled: 1770000
    num_steps_trained: 1770000
  iterations_since_restore: 1770


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1770,44532.1,1770000,0,0,0,345.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1771000
  custom_metrics: {}
  date: 2021-10-09_10-47-18
  done: false
  episode_len_mean: 342.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 5001
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6164836009287684
          cur_lr: 5.000000000000001e-05
          entropy: 1.5696664267116123
          entropy_coeff: 0.009999999999999998
          kl: 0.024813800541518462
          policy_loss: -0.16147970408201218
          total_loss: -0.1618431697289149
          vf_explained_var: -0.4230353832244873
          vf_loss: 3.589826615603265e-05
    num_agent_steps_sampled: 1771000
    num_agent_steps_trained: 1771000
    num_steps_sampled: 1771000
    num_steps_trained: 1771000
  iterations_since_restore: 1771
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1771,44558.9,1771000,0,0,0,342.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1772000
  custom_metrics: {}
  date: 2021-10-09_10-47-41
  done: false
  episode_len_mean: 342.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5003
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.777698070473141
          entropy_coeff: 0.009999999999999998
          kl: 0.014411851649303002
          policy_loss: -0.07803422847969664
          total_loss: -0.08246235640512573
          vf_explained_var: -0.8900668025016785
          vf_loss: 2.1847512289241423e-05
    num_agent_steps_sampled: 1772000
    num_agent_steps_trained: 1772000
    num_steps_sampled: 1772000
    num_steps_trained: 1772000
  iterations_since_restore: 1772
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1772,44581.6,1772000,0,0,0,342.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1773000
  custom_metrics: {}
  date: 2021-10-09_10-48-06
  done: false
  episode_len_mean: 343.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5006
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.670867951711019
          entropy_coeff: 0.009999999999999998
          kl: 0.010914469315643613
          policy_loss: -0.07011388282602032
          total_loss: -0.07671815254208114
          vf_explained_var: -0.27100083231925964
          vf_loss: 1.1522479427286373e-05
    num_agent_steps_sampled: 1773000
    num_agent_steps_trained: 1773000
    num_steps_sampled: 1773000
    num_steps_trained: 1773000
  iterations_since_restore: 1773
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1773,44606.5,1773000,0,0,0,343.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1774000
  custom_metrics: {}
  date: 2021-10-09_10-48-28
  done: false
  episode_len_mean: 343.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5009
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7079900172021654
          entropy_coeff: 0.009999999999999998
          kl: 0.011131462338480544
          policy_loss: -0.14121669853727023
          total_loss: -0.14799024479256737
          vf_explained_var: -0.382429838180542
          vf_loss: 1.280786100323894e-05
    num_agent_steps_sampled: 1774000
    num_agent_steps_trained: 1774000
    num_steps_sampled: 1774000
    num_steps_trained: 1774000
  iterations_since_restore: 1774
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1774,44628.8,1774000,0,0,0,343.95




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1775000
  custom_metrics: {}
  date: 2021-10-09_10-49-09
  done: false
  episode_len_mean: 343.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5012
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6628487269083658
          entropy_coeff: 0.009999999999999998
          kl: 0.006327234179963861
          policy_loss: 0.015531619058714972
          total_loss: 0.050394582086139256
          vf_explained_var: -0.5246811509132385
          vf_loss: 0.04564049371555383
    num_agent_steps_sampled: 1775000
    num_agent_steps_trained: 1775000
    num_steps_sampled: 1775000
    num_steps_trained: 1775000
  iterations_since_restore: 1775


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1775,44669.7,1775000,-0.03,0,-3,343.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1776000
  custom_metrics: {}
  date: 2021-10-09_10-49-32
  done: false
  episode_len_mean: 342.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5015
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.759994191593594
          entropy_coeff: 0.009999999999999998
          kl: 0.011490170140572391
          policy_loss: -0.03870139845336477
          total_loss: -0.0403035691111452
          vf_explained_var: -0.10838975757360458
          vf_loss: 0.005372518766671419
    num_agent_steps_sampled: 1776000
    num_agent_steps_trained: 1776000
    num_steps_sampled: 1776000
    num_steps_trained: 1776000
  iterations_since_restore: 1776


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1776,44691.9,1776000,-0.03,0,-3,342.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1777000
  custom_metrics: {}
  date: 2021-10-09_10-49-56
  done: false
  episode_len_mean: 343.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5018
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7698844220903185
          entropy_coeff: 0.009999999999999998
          kl: 0.011547267088189983
          policy_loss: -0.02581420065835118
          total_loss: -0.031061921361833812
          vf_explained_var: 0.05530934780836105
          vf_loss: 0.0017730701895844606
    num_agent_steps_sampled: 1777000
    num_agent_steps_trained: 1777000
    num_steps_sampled: 1777000
    num_steps_trained: 1777000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1777,44716.6,1777000,-0.03,0,-3,343.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1778000
  custom_metrics: {}
  date: 2021-10-09_10-50-20
  done: false
  episode_len_mean: 342.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5021
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8084920936160618
          entropy_coeff: 0.009999999999999998
          kl: 0.016488005972682275
          policy_loss: -0.14499928082029026
          total_loss: -0.14649069561726516
          vf_explained_var: -0.4848109781742096
          vf_loss: 0.0013466266443073335
    num_agent_steps_sampled: 1778000
    num_agent_steps_trained: 1778000
    num_steps_sampled: 1778000
    num_steps_trained: 1778000
  iterations_since_restore: 177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1778,44740.7,1778000,-0.03,0,-3,342.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1779000
  custom_metrics: {}
  date: 2021-10-09_10-50-43
  done: false
  episode_len_mean: 339.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 5023
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7046475913789538
          entropy_coeff: 0.009999999999999998
          kl: 0.010455372357539409
          policy_loss: -0.13319380068116718
          total_loss: -0.13969996968905132
          vf_explained_var: -0.5230599045753479
          vf_loss: 0.0008719569469879692
    num_agent_steps_sampled: 1779000
    num_agent_steps_trained: 1779000
    num_steps_sampled: 1779000
    num_steps_trained: 1779000
  iterations_since_restore: 177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1779,44763.1,1779000,-0.03,0,-3,339.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1780000
  custom_metrics: {}
  date: 2021-10-09_10-51-11
  done: false
  episode_len_mean: 339.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 5027
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7345987333191766
          entropy_coeff: 0.009999999999999998
          kl: 0.015181653620057469
          policy_loss: -0.03626560415658686
          total_loss: -0.03918462788893117
          vf_explained_var: -0.2524998188018799
          vf_loss: 0.00038810211376080083
    num_agent_steps_sampled: 1780000
    num_agent_steps_trained: 1780000
    num_steps_sampled: 1780000
    num_steps_trained: 1780000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1780,44790.8,1780000,-0.03,0,-3,339.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1781000
  custom_metrics: {}
  date: 2021-10-09_10-51-35
  done: false
  episode_len_mean: 339.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5030
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8098359809981452
          entropy_coeff: 0.009999999999999998
          kl: 0.013512044485070696
          policy_loss: -0.05621395506378677
          total_loss: -0.061319175879988405
          vf_explained_var: -0.853944718837738
          vf_loss: 0.0004982107590573529
    num_agent_steps_sampled: 1781000
    num_agent_steps_trained: 1781000
    num_steps_sampled: 1781000
    num_steps_trained: 1781000
  iterations_since_restore: 178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1781,44815.2,1781000,-0.03,0,-3,339.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1782000
  custom_metrics: {}
  date: 2021-10-09_10-52-00
  done: false
  episode_len_mean: 339.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5033
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7626503348350524
          entropy_coeff: 0.009999999999999998
          kl: 0.012891236247870768
          policy_loss: -0.05628942876226372
          total_loss: -0.06167315625482135
          vf_explained_var: -0.4939805865287781
          vf_loss: 0.0003219229455377596
    num_agent_steps_sampled: 1782000
    num_agent_steps_trained: 1782000
    num_steps_sampled: 1782000
    num_steps_trained: 1782000
  iterations_since_restore: 178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1782,44840.1,1782000,-0.03,0,-3,339.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1783000
  custom_metrics: {}
  date: 2021-10-09_10-52-24
  done: false
  episode_len_mean: 339.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5036
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.770621489153968
          entropy_coeff: 0.009999999999999998
          kl: 0.013065381678536407
          policy_loss: -0.11473494571530157
          total_loss: -0.1201575035850207
          vf_explained_var: -0.775575578212738
          vf_loss: 0.0002017669569694489
    num_agent_steps_sampled: 1783000
    num_agent_steps_trained: 1783000
    num_steps_sampled: 1783000
    num_steps_trained: 1783000
  iterations_since_restore: 1783
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1783,44864.5,1783000,-0.03,0,-3,339.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1784000
  custom_metrics: {}
  date: 2021-10-09_10-52-49
  done: false
  episode_len_mean: 339.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5039
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.807943704393175
          entropy_coeff: 0.009999999999999998
          kl: 0.011884619545904189
          policy_loss: -0.11621637294689814
          total_loss: -0.12305167271859116
          vf_explained_var: -0.5645466446876526
          vf_loss: 0.00025412894602696825
    num_agent_steps_sampled: 1784000
    num_agent_steps_trained: 1784000
    num_steps_sampled: 1784000
    num_steps_trained: 1784000
  iterations_since_restore: 178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1784,44889.5,1784000,-0.03,0,-3,339.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1785000
  custom_metrics: {}
  date: 2021-10-09_10-53-31
  done: false
  episode_len_mean: 338.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5042
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8271410438749525
          entropy_coeff: 0.009999999999999998
          kl: 0.016094024118370718
          policy_loss: -0.10220479534731972
          total_loss: -0.1053463236325317
          vf_explained_var: -0.05344339460134506
          vf_loss: 0.00024732792040190864
    num_agent_steps_sampled: 1785000
    num_agent_steps_trained: 1785000
    num_steps_sampled: 1785000
    num_steps_trained: 1785000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1785,44931.1,1785000,-0.03,0,-3,338.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1786000
  custom_metrics: {}
  date: 2021-10-09_10-53-56
  done: false
  episode_len_mean: 338.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5045
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7090481652153864
          entropy_coeff: 0.009999999999999998
          kl: 0.015695438623809822
          policy_loss: -0.06670464939541287
          total_loss: -0.06909808609634638
          vf_explained_var: -0.6587032675743103
          vf_loss: 0.00018307283907132742
    num_agent_steps_sampled: 1786000
    num_agent_steps_trained: 1786000
    num_steps_sampled: 1786000
    num_steps_trained: 1786000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1786,44956,1786000,-0.03,0,-3,338.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1787000
  custom_metrics: {}
  date: 2021-10-09_10-54-21
  done: false
  episode_len_mean: 337.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5048
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.589028126663632
          entropy_coeff: 0.009999999999999998
          kl: 0.009162518925414542
          policy_loss: -0.0515596576862865
          total_loss: -0.05877051614224911
          vf_explained_var: -0.4671226143836975
          vf_loss: 0.00020660775424201144
    num_agent_steps_sampled: 1787000
    num_agent_steps_trained: 1787000
    num_steps_sampled: 1787000
    num_steps_trained: 1787000
  iterations_since_restore: 1787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1787,44980.8,1787000,-0.03,0,-3,337.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1788000
  custom_metrics: {}
  date: 2021-10-09_10-54-46
  done: false
  episode_len_mean: 336.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5051
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6276096396976047
          entropy_coeff: 0.009999999999999998
          kl: 0.01505223073777202
          policy_loss: -0.05413805558863613
          total_loss: -0.056377565301954743
          vf_explained_var: 0.008272513747215271
          vf_loss: 0.00011740761155023291
    num_agent_steps_sampled: 1788000
    num_agent_steps_trained: 1788000
    num_steps_sampled: 1788000
    num_steps_trained: 1788000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1788,45006.3,1788000,-0.03,0,-3,336.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1789000
  custom_metrics: {}
  date: 2021-10-09_10-55-08
  done: false
  episode_len_mean: 336.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5054
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7016973124610053
          entropy_coeff: 0.009999999999999998
          kl: 0.007775706287619025
          policy_loss: -0.03343990246454875
          total_loss: -0.04309398432572683
          vf_explained_var: -0.8505271077156067
          vf_loss: 0.00017249490758533485
    num_agent_steps_sampled: 1789000
    num_agent_steps_trained: 1789000
    num_steps_sampled: 1789000
    num_steps_trained: 1789000
  iterations_since_restore: 178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1789,45028.6,1789000,-0.03,0,-3,336.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1790000
  custom_metrics: {}
  date: 2021-10-09_10-55-34
  done: false
  episode_len_mean: 336.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5057
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.665168163511488
          entropy_coeff: 0.009999999999999998
          kl: 0.01128137071779065
          policy_loss: -0.1686948626405663
          total_loss: -0.17482477869424556
          vf_explained_var: -0.6276447772979736
          vf_loss: 8.959433960424374e-05
    num_agent_steps_sampled: 1790000
    num_agent_steps_trained: 1790000
    num_steps_sampled: 1790000
    num_steps_trained: 1790000
  iterations_since_restore: 1790
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1790,45053.7,1790000,-0.03,0,-3,336.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1791000
  custom_metrics: {}
  date: 2021-10-09_10-55-58
  done: false
  episode_len_mean: 335.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5060
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8157782329453362
          entropy_coeff: 0.009999999999999998
          kl: 0.01231493984871182
          policy_loss: -0.0663799578944842
          total_loss: -0.07308491505682468
          vf_explained_var: -0.5975099205970764
          vf_loss: 6.488884961678802e-05
    num_agent_steps_sampled: 1791000
    num_agent_steps_trained: 1791000
    num_steps_sampled: 1791000
    num_steps_trained: 1791000
  iterations_since_restore: 1791


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1791,45078.2,1791000,-0.03,0,-3,335.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1792000
  custom_metrics: {}
  date: 2021-10-09_10-56-21
  done: false
  episode_len_mean: 335.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 5062
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.724184885289934
          entropy_coeff: 0.009999999999999998
          kl: 0.016247473780212355
          policy_loss: -0.019949416195352874
          total_loss: -0.022107709613111284
          vf_explained_var: -0.30130353569984436
          vf_loss: 5.9102865210055217e-05
    num_agent_steps_sampled: 1792000
    num_agent_steps_trained: 1792000
    num_steps_sampled: 1792000
    num_steps_trained: 1792000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1792,45100.7,1792000,-0.03,0,-3,335.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1793000
  custom_metrics: {}
  date: 2021-10-09_10-56-45
  done: false
  episode_len_mean: 336.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5065
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8761347863409255
          entropy_coeff: 0.009999999999999998
          kl: 0.013617121233076152
          policy_loss: -0.05791233620709843
          total_loss: -0.06403717059228155
          vf_explained_var: -0.5718528032302856
          vf_loss: 4.441568926267791e-05
    num_agent_steps_sampled: 1793000
    num_agent_steps_trained: 1793000
    num_steps_sampled: 1793000
    num_steps_trained: 1793000
  iterations_since_restore: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1793,45124.8,1793000,-0.03,0,-3,336.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1794000
  custom_metrics: {}
  date: 2021-10-09_10-57-08
  done: false
  episode_len_mean: 336.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5068
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7700207458602057
          entropy_coeff: 0.009999999999999998
          kl: 0.014017294799090942
          policy_loss: -0.07450642618868086
          total_loss: -0.07920019556250837
          vf_explained_var: -0.5618352293968201
          vf_loss: 4.4285500916885215e-05
    num_agent_steps_sampled: 1794000
    num_agent_steps_trained: 1794000
    num_steps_sampled: 1794000
    num_steps_trained: 1794000
  iterations_since_restore: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1794,45148.5,1794000,-0.03,0,-3,336.8




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1795000
  custom_metrics: {}
  date: 2021-10-09_10-57-54
  done: false
  episode_len_mean: 335.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 5072
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7024459759394328
          entropy_coeff: 0.009999999999999998
          kl: 0.014907123269818632
          policy_loss: -0.004543969324893421
          total_loss: -0.007719712580243746
          vf_explained_var: -0.4947223663330078
          vf_loss: 6.372485107325095e-05
    num_agent_steps_sampled: 1795000
    num_agent_steps_trained: 1795000
    num_steps_sampled: 1795000
    num_steps_trained: 1795000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1795,45193.6,1795000,-0.03,0,-3,335.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1796000
  custom_metrics: {}
  date: 2021-10-09_10-58-18
  done: false
  episode_len_mean: 336.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5075
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6988133973545498
          entropy_coeff: 0.009999999999999998
          kl: 0.009520642127190134
          policy_loss: -0.06758389106641213
          total_loss: -0.07572477420585023
          vf_explained_var: -0.4756735563278198
          vf_loss: 4.327072150166107e-05
    num_agent_steps_sampled: 1796000
    num_agent_steps_trained: 1796000
    num_steps_sampled: 1796000
    num_steps_trained: 1796000
  iterations_since_restore: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1796,45217.7,1796000,-0.03,0,-3,336.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1797000
  custom_metrics: {}
  date: 2021-10-09_10-58-40
  done: false
  episode_len_mean: 337.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 5077
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8710097604327731
          entropy_coeff: 0.009999999999999998
          kl: 0.011219891973053524
          policy_loss: -0.06687976097067197
          total_loss: -0.07517465907666418
          vf_explained_var: -0.6964545845985413
          vf_loss: 3.9880402487647694e-05
    num_agent_steps_sampled: 1797000
    num_agent_steps_trained: 1797000
    num_steps_sampled: 1797000
    num_steps_trained: 1797000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1797,45239.8,1797000,-0.03,0,-3,337.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1798000
  custom_metrics: {}
  date: 2021-10-09_10-59-00
  done: false
  episode_len_mean: 339.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 5080
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.5525399618678624
          entropy_coeff: 0.009999999999999998
          kl: 0.008500953199181519
          policy_loss: -0.10008347111029757
          total_loss: -0.10770619871715705
          vf_explained_var: -0.9956970810890198
          vf_loss: 4.162168685676685e-05
    num_agent_steps_sampled: 1798000
    num_agent_steps_trained: 1798000
    num_steps_sampled: 1798000
    num_steps_trained: 1798000
  iterations_since_restore: 1798

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1798,45259.5,1798000,-0.03,0,-3,339.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1799000
  custom_metrics: {}
  date: 2021-10-09_10-59-22
  done: false
  episode_len_mean: 340.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 5082
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8126401490635342
          entropy_coeff: 0.009999999999999998
          kl: 0.007657724789511377
          policy_loss: -0.022113253455609085
          total_loss: -0.033110902406689194
          vf_explained_var: -0.9637641906738281
          vf_loss: 4.7459869953551485e-05
    num_agent_steps_sampled: 1799000
    num_agent_steps_trained: 1799000
    num_steps_sampled: 1799000
    num_steps_trained: 1799000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1799,45281.9,1799000,-0.03,0,-3,340.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1800000
  custom_metrics: {}
  date: 2021-10-09_10-59-45
  done: false
  episode_len_mean: 341.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5085
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7936963438987732
          entropy_coeff: 0.009999999999999998
          kl: 0.01131689982645403
          policy_loss: -0.01947515308856964
          total_loss: 0.1558181706402037
          vf_explained_var: -0.5013915300369263
          vf_loss: 0.18276526757205527
    num_agent_steps_sampled: 1800000
    num_agent_steps_trained: 1800000
    num_steps_sampled: 1800000
    num_steps_trained: 1800000
  iterations_since_restore: 1800
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1800,45305,1800000,-0.12,0,-9,341.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1801000
  custom_metrics: {}
  date: 2021-10-09_11-00-11
  done: false
  episode_len_mean: 341.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5088
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7413033684094747
          entropy_coeff: 0.009999999999999998
          kl: 0.010626521740869421
          policy_loss: -0.08208686009877258
          total_loss: -0.08459391395250956
          vf_explained_var: -0.5641970038414001
          vf_loss: 0.005079366540950206
    num_agent_steps_sampled: 1801000
    num_agent_steps_trained: 1801000
    num_steps_sampled: 1801000
    num_steps_trained: 1801000
  iterations_since_restore: 1801

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1801,45330.4,1801000,-0.12,0,-9,341.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1802000
  custom_metrics: {}
  date: 2021-10-09_11-00-33
  done: false
  episode_len_mean: 340.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5091
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7871563302146063
          entropy_coeff: 0.009999999999999998
          kl: 0.01626461936635448
          policy_loss: -0.05845632538613346
          total_loss: -0.056809044049845804
          vf_explained_var: -0.7659295797348022
          vf_loss: 0.004478538025998407
    num_agent_steps_sampled: 1802000
    num_agent_steps_trained: 1802000
    num_steps_sampled: 1802000
    num_steps_trained: 1802000
  iterations_since_restore: 1802

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1802,45353.2,1802000,-0.12,0,-9,340.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1803000
  custom_metrics: {}
  date: 2021-10-09_11-00-55
  done: false
  episode_len_mean: 342.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5093
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8383070985476175
          entropy_coeff: 0.009999999999999998
          kl: 0.010982327075556499
          policy_loss: -0.06283667824334568
          total_loss: -0.06590185664180252
          vf_explained_var: -0.907958447933197
          vf_loss: 0.005162255736326592
    num_agent_steps_sampled: 1803000
    num_agent_steps_trained: 1803000
    num_steps_sampled: 1803000
    num_steps_trained: 1803000
  iterations_since_restore: 1803


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1803,45375,1803000,-0.12,0,-9,342.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1804000
  custom_metrics: {}
  date: 2021-10-09_11-01-18
  done: false
  episode_len_mean: 343.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5096
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8676896201239692
          entropy_coeff: 0.009999999999999998
          kl: 0.013754421663369874
          policy_loss: -0.07530827532625861
          total_loss: -0.07970431320783165
          vf_explained_var: -0.7564233541488647
          vf_loss: 0.0015617958160065528
    num_agent_steps_sampled: 1804000
    num_agent_steps_trained: 1804000
    num_steps_sampled: 1804000
    num_steps_trained: 1804000
  iterations_since_restore: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1804,45398,1804000,-0.12,0,-9,343.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1805000
  custom_metrics: {}
  date: 2021-10-09_11-01-42
  done: false
  episode_len_mean: 342.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5099
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7343934231334261
          entropy_coeff: 0.009999999999999998
          kl: 0.01435432599395531
          policy_loss: -0.06514288089755509
          total_loss: -0.06734074039591684
          vf_explained_var: -0.7461427450180054
          vf_loss: 0.0018722633274996447
    num_agent_steps_sampled: 1805000
    num_agent_steps_trained: 1805000
    num_steps_sampled: 1805000
    num_steps_trained: 1805000
  iterations_since_restore: 1805

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1805,45422.2,1805000,-0.12,0,-9,342.94




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1806000
  custom_metrics: {}
  date: 2021-10-09_11-02-27
  done: false
  episode_len_mean: 344.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5102
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7566300564342074
          entropy_coeff: 0.009999999999999998
          kl: 0.011459323873227194
          policy_loss: -0.09130146631764041
          total_loss: -0.09546578406459755
          vf_explained_var: -0.7246107459068298
          vf_loss: 0.00280525703371192
    num_agent_steps_sampled: 1806000
    num_agent_steps_trained: 1806000
    num_steps_sampled: 1806000
    num_steps_trained: 1806000
  iterations_since_restore: 1806


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1806,45466.6,1806000,-0.12,0,-9,344.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1807000
  custom_metrics: {}
  date: 2021-10-09_11-02-49
  done: false
  episode_len_mean: 344.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5105
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8541808234320747
          entropy_coeff: 0.009999999999999998
          kl: 0.01142040253224788
          policy_loss: -0.10707252495404747
          total_loss: -0.11305971615430381
          vf_explained_var: -0.6989416480064392
          vf_loss: 0.001993880147347227
    num_agent_steps_sampled: 1807000
    num_agent_steps_trained: 1807000
    num_steps_sampled: 1807000
    num_steps_trained: 1807000
  iterations_since_restore: 1807


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1807,45489.1,1807000,-0.12,0,-9,344.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1808000
  custom_metrics: {}
  date: 2021-10-09_11-03-13
  done: false
  episode_len_mean: 344.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5108
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8244307385550604
          entropy_coeff: 0.009999999999999998
          kl: 0.009279770781824128
          policy_loss: -0.02500830946697129
          total_loss: -0.029882509633898734
          vf_explained_var: -0.4893359839916229
          vf_loss: 0.004788866181883754
    num_agent_steps_sampled: 1808000
    num_agent_steps_trained: 1808000
    num_steps_sampled: 1808000
    num_steps_trained: 1808000
  iterations_since_restore: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1808,45512.5,1808000,-0.12,0,-9,344.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1809000
  custom_metrics: {}
  date: 2021-10-09_11-03-38
  done: false
  episode_len_mean: 344.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5111
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8284760978486803
          entropy_coeff: 0.009999999999999998
          kl: 0.01330040115985918
          policy_loss: -0.10340968080692821
          total_loss: -0.10813883907265133
          vf_explained_var: -0.4591549336910248
          vf_loss: 0.0012563859959805591
    num_agent_steps_sampled: 1809000
    num_agent_steps_trained: 1809000
    num_steps_sampled: 1809000
    num_steps_trained: 1809000
  iterations_since_restore: 1809

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1809,45537.3,1809000,-0.12,0,-9,344.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1810000
  custom_metrics: {}
  date: 2021-10-09_11-04-03
  done: false
  episode_len_mean: 343.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5114
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7563978870709738
          entropy_coeff: 0.009999999999999998
          kl: 0.0143949356744426
          policy_loss: -0.036098359007802276
          total_loss: -0.039681456672648587
          vf_explained_var: -0.5870763659477234
          vf_loss: 0.0006695200197605623
    num_agent_steps_sampled: 1810000
    num_agent_steps_trained: 1810000
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
  iterations_since_restore: 181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1810,45562.2,1810000,-0.09,0,-9,343.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1811000
  custom_metrics: {}
  date: 2021-10-09_11-04-27
  done: false
  episode_len_mean: 343.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5117
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7838217086262174
          entropy_coeff: 0.009999999999999998
          kl: 0.012447650350889895
          policy_loss: -0.026830031019118097
          total_loss: -0.0326619116589427
          vf_explained_var: -0.17726173996925354
          vf_loss: 0.0004956806332403277
    num_agent_steps_sampled: 1811000
    num_agent_steps_trained: 1811000
    num_steps_sampled: 1811000
    num_steps_trained: 1811000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1811,45586.5,1811000,-0.09,0,-9,343.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1812000
  custom_metrics: {}
  date: 2021-10-09_11-04-50
  done: false
  episode_len_mean: 343.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5120
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8664304004775154
          entropy_coeff: 0.009999999999999998
          kl: 0.012086061798825595
          policy_loss: -0.08581912577566174
          total_loss: -0.09247575646473302
          vf_explained_var: -1.0
          vf_loss: 0.0008313851854634574
    num_agent_steps_sampled: 1812000
    num_agent_steps_trained: 1812000
    num_steps_sampled: 1812000
    num_steps_trained: 1812000
  iterations_since_restore: 1812
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1812,45609.2,1812000,-0.09,0,-9,343.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1813000
  custom_metrics: {}
  date: 2021-10-09_11-05-13
  done: false
  episode_len_mean: 343.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5122
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9247254013931521
          cur_lr: 5.000000000000001e-05
          entropy: 1.786154454284244
          entropy_coeff: 0.009999999999999998
          kl: 0.02603825639856324
          policy_loss: -0.08566286654935942
          total_loss: -0.07682155519723892
          vf_explained_var: 0.3363215923309326
          vf_loss: 0.0026246146344217577
    num_agent_steps_sampled: 1813000
    num_agent_steps_trained: 1813000
    num_steps_sampled: 1813000
    num_steps_trained: 1813000
  iterations_since_restore: 1813
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1813,45632.2,1813000,-0.09,0,-9,343.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1814000
  custom_metrics: {}
  date: 2021-10-09_11-05-35
  done: false
  episode_len_mean: 345.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5125
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9146632870038351
          entropy_coeff: 0.009999999999999998
          kl: 0.010097712000615308
          policy_loss: -0.12433944543202718
          total_loss: -0.1288719872219695
          vf_explained_var: -0.6959725618362427
          vf_loss: 0.0006076762329838756
    num_agent_steps_sampled: 1814000
    num_agent_steps_trained: 1814000
    num_steps_sampled: 1814000
    num_steps_trained: 1814000
  iterations_since_restore: 1814


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1814,45654.4,1814000,-0.09,0,-9,345.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1815000
  custom_metrics: {}
  date: 2021-10-09_11-05-58
  done: false
  episode_len_mean: 346.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5128
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.968575492170122
          entropy_coeff: 0.009999999999999998
          kl: 0.008651310424828257
          policy_loss: -0.09585960424608654
          total_loss: -0.10307205378388365
          vf_explained_var: -0.6473854780197144
          vf_loss: 0.0004731755784531641
    num_agent_steps_sampled: 1815000
    num_agent_steps_trained: 1815000
    num_steps_sampled: 1815000
    num_steps_trained: 1815000
  iterations_since_restore: 1815


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1815,45677.2,1815000,-0.09,0,-9,346.41




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1816000
  custom_metrics: {}
  date: 2021-10-09_11-06-34
  done: false
  episode_len_mean: 347.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5130
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9308427704705133
          entropy_coeff: 0.009999999999999998
          kl: 0.008011263429116176
          policy_loss: -0.08673615412165721
          total_loss: -0.09446325066188971
          vf_explained_var: -0.6203619837760925
          vf_loss: 0.00046900509817836186
    num_agent_steps_sampled: 1816000
    num_agent_steps_trained: 1816000
    num_steps_sampled: 1816000
    num_steps_trained: 1816000
  iterations_since_restore: 1816

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1816,45713.5,1816000,-0.09,0,-9,347.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1817000
  custom_metrics: {}
  date: 2021-10-09_11-06-59
  done: false
  episode_len_mean: 349.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5133
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9452092144224378
          entropy_coeff: 0.009999999999999998
          kl: 0.008905362595231815
          policy_loss: -0.1632391800896989
          total_loss: -0.1699553852279981
          vf_explained_var: -0.8951750993728638
          vf_loss: 0.00038336613150830896
    num_agent_steps_sampled: 1817000
    num_agent_steps_trained: 1817000
    num_steps_sampled: 1817000
    num_steps_trained: 1817000
  iterations_since_restore: 1817


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1817,45738.5,1817000,-0.09,0,-9,349.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1818000
  custom_metrics: {}
  date: 2021-10-09_11-07-21
  done: false
  episode_len_mean: 350.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5135
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9402047687106663
          entropy_coeff: 0.009999999999999998
          kl: 0.008284437890646264
          policy_loss: -0.03939450896448559
          total_loss: -0.04691731139189667
          vf_explained_var: -0.8467994928359985
          vf_loss: 0.000388000653603942
    num_agent_steps_sampled: 1818000
    num_agent_steps_trained: 1818000
    num_steps_sampled: 1818000
    num_steps_trained: 1818000
  iterations_since_restore: 1818


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1818,45760.3,1818000,-0.09,0,-9,350.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1819000
  custom_metrics: {}
  date: 2021-10-09_11-07-48
  done: false
  episode_len_mean: 350.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 4
  episodes_total: 5139
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8238342656029596
          entropy_coeff: 0.009999999999999998
          kl: 0.008717985569222462
          policy_loss: -0.10942107397649023
          total_loss: -0.11536903666953245
          vf_explained_var: -0.7032706141471863
          vf_loss: 0.0001977654266940792
    num_agent_steps_sampled: 1819000
    num_agent_steps_trained: 1819000
    num_steps_sampled: 1819000
    num_steps_trained: 1819000
  iterations_since_restore: 1819

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1819,45787.1,1819000,-0.09,0,-9,350.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1820000
  custom_metrics: {}
  date: 2021-10-09_11-08-12
  done: false
  episode_len_mean: 350.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5141
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.6758206062846714
          entropy_coeff: 0.009999999999999998
          kl: 0.008600280209913751
          policy_loss: -0.07000776116425793
          total_loss: -0.07461642598112424
          vf_explained_var: -0.9998906254768372
          vf_loss: 0.0002201931827585213
    num_agent_steps_sampled: 1820000
    num_agent_steps_trained: 1820000
    num_steps_sampled: 1820000
    num_steps_trained: 1820000
  iterations_since_restore: 1820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1820,45811.7,1820000,-0.09,0,-9,350.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1821000
  custom_metrics: {}
  date: 2021-10-09_11-08-35
  done: false
  episode_len_mean: 352.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5144
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9256541980637445
          entropy_coeff: 0.009999999999999998
          kl: 0.009523534762995419
          policy_loss: -0.1012429014262226
          total_loss: -0.10702252255545722
          vf_explained_var: -0.9433924555778503
          vf_loss: 0.0002669416724529583
    num_agent_steps_sampled: 1821000
    num_agent_steps_trained: 1821000
    num_steps_sampled: 1821000
    num_steps_trained: 1821000
  iterations_since_restore: 1821


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1821,45834.7,1821000,-0.09,0,-9,352.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1822000
  custom_metrics: {}
  date: 2021-10-09_11-09-00
  done: false
  episode_len_mean: 352.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5147
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9884105920791626
          entropy_coeff: 0.009999999999999998
          kl: 0.00834964498176528
          policy_loss: -0.1055204961862829
          total_loss: -0.11364749421676
          vf_explained_var: -1.0
          vf_loss: 0.00017541520428494551
    num_agent_steps_sampled: 1822000
    num_agent_steps_trained: 1822000
    num_steps_sampled: 1822000
    num_steps_trained: 1822000
  iterations_since_restore: 1822
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1822,45859.9,1822000,-0.09,0,-9,352.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1823000
  custom_metrics: {}
  date: 2021-10-09_11-09-24
  done: false
  episode_len_mean: 352.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5150
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8736383120218914
          entropy_coeff: 0.009999999999999998
          kl: 0.009381001038573642
          policy_loss: -0.13530246652662753
          total_loss: -0.14086239313085874
          vf_explained_var: -1.0
          vf_loss: 0.00016418290073892826
    num_agent_steps_sampled: 1823000
    num_agent_steps_trained: 1823000
    num_steps_sampled: 1823000
    num_steps_trained: 1823000
  iterations_since_restore: 1823
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1823,45883.7,1823000,-0.09,0,-9,352.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1824000
  custom_metrics: {}
  date: 2021-10-09_11-09-47
  done: false
  episode_len_mean: 354.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5152
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.941456405321757
          entropy_coeff: 0.009999999999999998
          kl: 0.007528430201662021
          policy_loss: -0.10616825200203392
          total_loss: -0.11498756463535958
          vf_explained_var: -0.88591468334198
          vf_loss: 0.00015265622172996196
    num_agent_steps_sampled: 1824000
    num_agent_steps_trained: 1824000
    num_steps_sampled: 1824000
    num_steps_trained: 1824000
  iterations_since_restore: 1824
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1824,45906.6,1824000,-0.09,0,-9,354.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1825000
  custom_metrics: {}
  date: 2021-10-09_11-10-10
  done: false
  episode_len_mean: 355.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5155
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8600819746653239
          entropy_coeff: 0.009999999999999998
          kl: 0.00880115047872784
          policy_loss: -0.08268140084627602
          total_loss: -0.08896628755351735
          vf_explained_var: -0.8024712204933167
          vf_loss: 0.00010796302683754928
    num_agent_steps_sampled: 1825000
    num_agent_steps_trained: 1825000
    num_steps_sampled: 1825000
    num_steps_trained: 1825000
  iterations_since_restore: 1825

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1825,45929.1,1825000,-0.09,0,-9,355.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1826000
  custom_metrics: {}
  date: 2021-10-09_11-10-34
  done: false
  episode_len_mean: 355.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5158
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.7922575023439196
          entropy_coeff: 0.009999999999999998
          kl: 0.01067774858638069
          policy_loss: -0.04615030008264714
          total_loss: -0.04912821889544527
          vf_explained_var: -0.6284170150756836
          vf_loss: 0.00013368004434192294
    num_agent_steps_sampled: 1826000
    num_agent_steps_trained: 1826000
    num_steps_sampled: 1826000
    num_steps_trained: 1826000
  iterations_since_restore: 1826


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1826,45953,1826000,-0.09,0,-9,355.8




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1827000
  custom_metrics: {}
  date: 2021-10-09_11-11-16
  done: false
  episode_len_mean: 354.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5161
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.839434305826823
          entropy_coeff: 0.009999999999999998
          kl: 0.008764953845086963
          policy_loss: -0.1465965971764591
          total_loss: -0.15274932641122077
          vf_explained_var: -0.7684536576271057
          vf_loss: 8.385148788268756e-05
    num_agent_steps_sampled: 1827000
    num_agent_steps_trained: 1827000
    num_steps_sampled: 1827000
    num_steps_trained: 1827000
  iterations_since_restore: 1827
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1827,45995.6,1827000,-0.09,0,-9,354.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1828000
  custom_metrics: {}
  date: 2021-10-09_11-11-39
  done: false
  episode_len_mean: 356.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5164
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.7414610968695747
          entropy_coeff: 0.009999999999999998
          kl: 0.011167201037883285
          policy_loss: -0.11020580430825551
          total_loss: -0.11202547928939263
          vf_explained_var: -0.5391724705696106
          vf_loss: 0.0001050399221033634
    num_agent_steps_sampled: 1828000
    num_agent_steps_trained: 1828000
    num_steps_sampled: 1828000
    num_steps_trained: 1828000
  iterations_since_restore: 1828

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1828,46018.2,1828000,-0.09,0,-9,356.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1829000
  custom_metrics: {}
  date: 2021-10-09_11-12-01
  done: false
  episode_len_mean: 356.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5166
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9049482054180569
          entropy_coeff: 0.009999999999999998
          kl: 0.00969137083408338
          policy_loss: -0.08337962913016478
          total_loss: -0.08887585182156828
          vf_explained_var: -0.85886549949646
          vf_loss: 0.00011047518912366488
    num_agent_steps_sampled: 1829000
    num_agent_steps_trained: 1829000
    num_steps_sampled: 1829000
    num_steps_trained: 1829000
  iterations_since_restore: 1829
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1829,46040.1,1829000,-0.09,0,-9,356.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1830000
  custom_metrics: {}
  date: 2021-10-09_11-12-21
  done: false
  episode_len_mean: 358.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5168
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8932677083545262
          entropy_coeff: 0.009999999999999998
          kl: 0.009109460690955027
          policy_loss: -0.0602881014554037
          total_loss: -0.06648303725653225
          vf_explained_var: -1.0
          vf_loss: 0.00010211879105352435
    num_agent_steps_sampled: 1830000
    num_agent_steps_trained: 1830000
    num_steps_sampled: 1830000
    num_steps_trained: 1830000
  iterations_since_restore: 1830
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1830,46060.3,1830000,-0.09,0,-9,358.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1831000
  custom_metrics: {}
  date: 2021-10-09_11-12-43
  done: false
  episode_len_mean: 360.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5171
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9646939078966776
          entropy_coeff: 0.009999999999999998
          kl: 0.00972485585929138
          policy_loss: -0.10851265858444903
          total_loss: -0.11458847026030222
          vf_explained_var: -0.7605803608894348
          vf_loss: 8.189514871143425e-05
    num_agent_steps_sampled: 1831000
    num_agent_steps_trained: 1831000
    num_steps_sampled: 1831000
    num_steps_trained: 1831000
  iterations_since_restore: 1831
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1831,46082.7,1831000,-0.09,0,-9,360.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1832000
  custom_metrics: {}
  date: 2021-10-09_11-13-06
  done: false
  episode_len_mean: 363.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5174
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 2.0276771982510886
          entropy_coeff: 0.009999999999999998
          kl: 0.010137132817718155
          policy_loss: -0.11443881479402383
          total_loss: -0.1205682394405206
          vf_explained_var: -0.9288115501403809
          vf_loss: 8.62493516857891e-05
    num_agent_steps_sampled: 1832000
    num_agent_steps_trained: 1832000
    num_steps_sampled: 1832000
    num_steps_trained: 1832000
  iterations_since_restore: 1832
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1832,46105.3,1832000,-0.09,0,-9,363.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1833000
  custom_metrics: {}
  date: 2021-10-09_11-13-29
  done: false
  episode_len_mean: 363.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5176
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.717531645298004
          entropy_coeff: 0.009999999999999998
          kl: 0.006554597686986854
          policy_loss: -0.09699430647823545
          total_loss: -0.10502665402988592
          vf_explained_var: -0.9500670433044434
          vf_loss: 5.116522329949981e-05
    num_agent_steps_sampled: 1833000
    num_agent_steps_trained: 1833000
    num_steps_sampled: 1833000
    num_steps_trained: 1833000
  iterations_since_restore: 1833


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1833,46128,1833000,-0.09,0,-9,363.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1834000
  custom_metrics: {}
  date: 2021-10-09_11-13-50
  done: false
  episode_len_mean: 363.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5179
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9194719195365906
          entropy_coeff: 0.009999999999999998
          kl: 0.009267362356510538
          policy_loss: -0.06800731658521626
          total_loss: -0.07429777772890198
          vf_explained_var: -0.9433871507644653
          vf_loss: 4.960957295325998e-05
    num_agent_steps_sampled: 1834000
    num_agent_steps_trained: 1834000
    num_steps_sampled: 1834000
    num_steps_trained: 1834000
  iterations_since_restore: 1834

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1834,46149.5,1834000,-0.09,0,-9,363.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1835000
  custom_metrics: {}
  date: 2021-10-09_11-14-15
  done: false
  episode_len_mean: 363.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 5182
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9030315385924446
          entropy_coeff: 0.009999999999999998
          kl: 0.010126804254365851
          policy_loss: -0.08093182922651371
          total_loss: -0.08587194948146741
          vf_explained_var: -0.5848329067230225
          vf_loss: 4.34224227824921e-05
    num_agent_steps_sampled: 1835000
    num_agent_steps_trained: 1835000
    num_steps_sampled: 1835000
    num_steps_trained: 1835000
  iterations_since_restore: 1835


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1835,46174.5,1835000,-0.09,0,-9,363.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1836000
  custom_metrics: {}
  date: 2021-10-09_11-14-37
  done: false
  episode_len_mean: 363.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 5184
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9965114805433486
          entropy_coeff: 0.009999999999999998
          kl: 0.012134628532203273
          policy_loss: -0.10553225205383367
          total_loss: -0.10854633000886274
          vf_explained_var: -0.5162275433540344
          vf_loss: 0.00011923875253059022
    num_agent_steps_sampled: 1836000
    num_agent_steps_trained: 1836000
    num_steps_sampled: 1836000
    num_steps_trained: 1836000
  iterations_since_restore: 183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1836,46195.7,1836000,-0.09,0,-9,363.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1837000
  custom_metrics: {}
  date: 2021-10-09_11-15-00
  done: false
  episode_len_mean: 364.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5187
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8956043773227267
          entropy_coeff: 0.009999999999999998
          kl: 0.009251952792324759
          policy_loss: -0.08965706895622942
          total_loss: -0.09573092682080137
          vf_explained_var: -0.9270561337471008
          vf_loss: 4.891322470131045e-05
    num_agent_steps_sampled: 1837000
    num_agent_steps_trained: 1837000
    num_steps_sampled: 1837000
    num_steps_trained: 1837000
  iterations_since_restore: 1837
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1837,46219.2,1837000,0,0,0,364.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1838000
  custom_metrics: {}
  date: 2021-10-09_11-15-23
  done: false
  episode_len_mean: 365.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5189
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8298288994365268
          entropy_coeff: 0.009999999999999998
          kl: 0.008607832253444395
          policy_loss: -0.09597127633169293
          total_loss: -0.10230197438763247
          vf_explained_var: -0.9178282618522644
          vf_loss: 2.7768879448962658e-05
    num_agent_steps_sampled: 1838000
    num_agent_steps_trained: 1838000
    num_steps_sampled: 1838000
    num_steps_trained: 1838000
  iterations_since_restore: 1838
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1838,46242.4,1838000,0,0,0,365.49




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1839000
  custom_metrics: {}
  date: 2021-10-09_11-16-04
  done: false
  episode_len_mean: 365.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5192
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9199348714616564
          entropy_coeff: 0.009999999999999998
          kl: 0.008447525672004167
          policy_loss: -0.07537079972939359
          total_loss: -0.08281795773655176
          vf_explained_var: -0.6605696082115173
          vf_loss: 3.472889093245612e-05
    num_agent_steps_sampled: 1839000
    num_agent_steps_trained: 1839000
    num_steps_sampled: 1839000
    num_steps_trained: 1839000
  iterations_since_restore: 1839
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1839,46282.7,1839000,0,0,0,365.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1840000
  custom_metrics: {}
  date: 2021-10-09_11-16-28
  done: false
  episode_len_mean: 363.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5195
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8380016340149774
          entropy_coeff: 0.009999999999999998
          kl: 0.007734392191661242
          policy_loss: -0.027825901988479826
          total_loss: -0.03544997351451053
          vf_explained_var: -0.8542184829711914
          vf_loss: 2.76614680438393e-05
    num_agent_steps_sampled: 1840000
    num_agent_steps_trained: 1840000
    num_steps_sampled: 1840000
    num_steps_trained: 1840000
  iterations_since_restore: 1840
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1840,46306.7,1840000,0,0,0,363.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1841000
  custom_metrics: {}
  date: 2021-10-09_11-16-50
  done: false
  episode_len_mean: 364.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5198
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9758854932255214
          entropy_coeff: 0.009999999999999998
          kl: 0.008550082240268923
          policy_loss: -0.13704880813343656
          total_loss: -0.14492891009690034
          vf_explained_var: -0.9602720737457275
          vf_loss: 1.9036014044912818e-05
    num_agent_steps_sampled: 1841000
    num_agent_steps_trained: 1841000
    num_steps_sampled: 1841000
    num_steps_trained: 1841000
  iterations_since_restore: 1841
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1841,46329,1841000,0,0,0,364.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1842000
  custom_metrics: {}
  date: 2021-10-09_11-17-11
  done: false
  episode_len_mean: 366.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5200
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9945640312300787
          entropy_coeff: 0.009999999999999998
          kl: 0.009418298315349698
          policy_loss: -0.14825390833947394
          total_loss: -0.15511107668280602
          vf_explained_var: -0.7307921648025513
          vf_loss: 2.446190859094107e-05
    num_agent_steps_sampled: 1842000
    num_agent_steps_trained: 1842000
    num_steps_sampled: 1842000
    num_steps_trained: 1842000
  iterations_since_restore: 1842
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1842,46349.9,1842000,0,0,0,366.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1843000
  custom_metrics: {}
  date: 2021-10-09_11-17-32
  done: false
  episode_len_mean: 368.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5203
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9035426086849636
          entropy_coeff: 0.009999999999999998
          kl: 0.009193106426843024
          policy_loss: -0.1361259741915597
          total_loss: -0.14237943871153724
          vf_explained_var: -0.7146841287612915
          vf_loss: 3.0315346318982644e-05
    num_agent_steps_sampled: 1843000
    num_agent_steps_trained: 1843000
    num_steps_sampled: 1843000
    num_steps_trained: 1843000
  iterations_since_restore: 1843
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1843,46371.4,1843000,0,0,0,368.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1844000
  custom_metrics: {}
  date: 2021-10-09_11-17-53
  done: false
  episode_len_mean: 369.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5205
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9710917512575785
          entropy_coeff: 0.009999999999999998
          kl: 0.009319800598747385
          policy_loss: -0.059782016277313235
          total_loss: -0.06653490705001686
          vf_explained_var: -0.7921186089515686
          vf_loss: 3.064383955562435e-05
    num_agent_steps_sampled: 1844000
    num_agent_steps_trained: 1844000
    num_steps_sampled: 1844000
    num_steps_trained: 1844000
  iterations_since_restore: 1844
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1844,46392.4,1844000,0,0,0,369.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1845000
  custom_metrics: {}
  date: 2021-10-09_11-18-18
  done: false
  episode_len_mean: 369.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5208
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9644787629445395
          entropy_coeff: 0.009999999999999998
          kl: 0.011547459906307268
          policy_loss: -0.1016286161624723
          total_loss: -0.10524053699854348
          vf_explained_var: -0.28960391879081726
          vf_loss: 1.552276664571966e-05
    num_agent_steps_sampled: 1845000
    num_agent_steps_trained: 1845000
    num_steps_sampled: 1845000
    num_steps_trained: 1845000
  iterations_since_restore: 1845
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1845,46417.4,1845000,0,0,0,369.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1846000
  custom_metrics: {}
  date: 2021-10-09_11-18-44
  done: false
  episode_len_mean: 368.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5211
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.771852477391561
          entropy_coeff: 0.009999999999999998
          kl: 0.008208505811509554
          policy_loss: -0.08564619964195622
          total_loss: -0.0919637201146947
          vf_explained_var: -0.8084399700164795
          vf_loss: 1.5085485008765116e-05
    num_agent_steps_sampled: 1846000
    num_agent_steps_trained: 1846000
    num_steps_sampled: 1846000
    num_steps_trained: 1846000
  iterations_since_restore: 1846
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1846,46443,1846000,0,0,0,368.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1847000
  custom_metrics: {}
  date: 2021-10-09_11-19-09
  done: false
  episode_len_mean: 369.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5214
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.798765359984504
          entropy_coeff: 0.009999999999999998
          kl: 0.009985673426910363
          policy_loss: -0.12332915022141404
          total_loss: -0.1274456083153685
          vf_explained_var: -0.7196337580680847
          vf_loss: 2.0184188679195357e-05
    num_agent_steps_sampled: 1847000
    num_agent_steps_trained: 1847000
    num_steps_sampled: 1847000
    num_steps_trained: 1847000
  iterations_since_restore: 1847
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1847,46467.7,1847000,0,0,0,369.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1848000
  custom_metrics: {}
  date: 2021-10-09_11-19-35
  done: false
  episode_len_mean: 368.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5217
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.7163272023200988
          entropy_coeff: 0.009999999999999998
          kl: 0.008616662871150647
          policy_loss: -0.10179859159721269
          total_loss: -0.10699874858061473
          vf_explained_var: -0.4271581470966339
          vf_loss: 1.1044918523516066e-05
    num_agent_steps_sampled: 1848000
    num_agent_steps_trained: 1848000
    num_steps_sampled: 1848000
    num_steps_trained: 1848000
  iterations_since_restore: 1848
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1848,46493.8,1848000,0,0,0,368.84




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1849000
  custom_metrics: {}
  date: 2021-10-09_11-20-16
  done: false
  episode_len_mean: 368.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5220
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 2.030099942949083
          entropy_coeff: 0.009999999999999998
          kl: 0.011053554473563527
          policy_loss: -0.1012064777314663
          total_loss: -0.10616118167009618
          vf_explained_var: -0.9744696021080017
          vf_loss: 1.4039322635047331e-05
    num_agent_steps_sampled: 1849000
    num_agent_steps_trained: 1849000
    num_steps_sampled: 1849000
    num_steps_trained: 1849000
  iterations_since_restore: 1849
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1849,46534.6,1849000,0,0,0,368.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1850000
  custom_metrics: {}
  date: 2021-10-09_11-20-37
  done: false
  episode_len_mean: 370.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5222
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8762220568127101
          entropy_coeff: 0.009999999999999998
          kl: 0.009229023168919485
          policy_loss: -0.06475061455534564
          total_loss: -0.07067299928102229
          vf_explained_var: -0.8877277970314026
          vf_loss: 3.836704565603416e-05
    num_agent_steps_sampled: 1850000
    num_agent_steps_trained: 1850000
    num_steps_sampled: 1850000
    num_steps_trained: 1850000
  iterations_since_restore: 1850
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1850,46555.8,1850000,0,0,0,370.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1851000
  custom_metrics: {}
  date: 2021-10-09_11-21-01
  done: false
  episode_len_mean: 369.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5225
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8318740102979871
          entropy_coeff: 0.009999999999999998
          kl: 0.012756709414966177
          policy_loss: -0.07197703116883834
          total_loss: -0.07257527771095434
          vf_explained_var: -0.1798362284898758
          vf_loss: 2.581244960108759e-05
    num_agent_steps_sampled: 1851000
    num_agent_steps_trained: 1851000
    num_steps_sampled: 1851000
    num_steps_trained: 1851000
  iterations_since_restore: 1851
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1851,46579.9,1851000,0,0,0,369.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1852000
  custom_metrics: {}
  date: 2021-10-09_11-21-25
  done: false
  episode_len_mean: 369.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5228
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.948354552851783
          entropy_coeff: 0.009999999999999998
          kl: 0.011395180235933427
          policy_loss: -0.14648129103912247
          total_loss: -0.15014064030514823
          vf_explained_var: -0.46819353103637695
          vf_loss: 1.808066740522918e-05
    num_agent_steps_sampled: 1852000
    num_agent_steps_trained: 1852000
    num_steps_sampled: 1852000
    num_steps_trained: 1852000
  iterations_since_restore: 1852
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1852,46603.6,1852000,0,0,0,369.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1853000
  custom_metrics: {}
  date: 2021-10-09_11-21-50
  done: false
  episode_len_mean: 368.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5231
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.8941682193014358
          entropy_coeff: 0.009999999999999998
          kl: 0.009951854189158895
          policy_loss: -0.060740124641193285
          total_loss: -0.06586562916636467
          vf_explained_var: -0.45538872480392456
          vf_loss: 1.2081937423621033e-05
    num_agent_steps_sampled: 1853000
    num_agent_steps_trained: 1853000
    num_steps_sampled: 1853000
    num_steps_trained: 1853000
  iterations_since_restore: 1853

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1853,46628.4,1853000,0,0,0,368.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1854000
  custom_metrics: {}
  date: 2021-10-09_11-22-13
  done: false
  episode_len_mean: 367.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5234
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9887476099861994
          entropy_coeff: 0.009999999999999998
          kl: 0.008771378131137217
          policy_loss: -0.07037271945219901
          total_loss: -0.07808606453860799
          vf_explained_var: -0.7828706502914429
          vf_loss: 7.4581657195772604e-06
    num_agent_steps_sampled: 1854000
    num_agent_steps_trained: 1854000
    num_steps_sampled: 1854000
    num_steps_trained: 1854000
  iterations_since_restore: 1854
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1854,46652,1854000,0,0,0,367.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1855000
  custom_metrics: {}
  date: 2021-10-09_11-22-37
  done: false
  episode_len_mean: 367.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5236
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9561924086676703
          entropy_coeff: 0.009999999999999998
          kl: 0.008833132329229788
          policy_loss: -0.10192822346256838
          total_loss: -0.10923139072126813
          vf_explained_var: -0.24153755605220795
          vf_loss: 6.4267956076946575e-06
    num_agent_steps_sampled: 1855000
    num_agent_steps_trained: 1855000
    num_steps_sampled: 1855000
    num_steps_trained: 1855000
  iterations_since_restore: 1855


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1855,46676.2,1855000,0,0,0,367.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1856000
  custom_metrics: {}
  date: 2021-10-09_11-23-01
  done: false
  episode_len_mean: 368.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5239
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.94638501935535
          entropy_coeff: 0.009999999999999998
          kl: 0.009388090547798218
          policy_loss: -0.11496172746022543
          total_loss: -0.121394056495693
          vf_explained_var: -0.41907191276550293
          vf_loss: 9.411322394549239e-06
    num_agent_steps_sampled: 1856000
    num_agent_steps_trained: 1856000
    num_steps_sampled: 1856000
    num_steps_trained: 1856000
  iterations_since_restore: 1856
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1856,46699.9,1856000,0,0,0,368.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1857000
  custom_metrics: {}
  date: 2021-10-09_11-23-24
  done: false
  episode_len_mean: 368.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5242
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.7873316446940104
          entropy_coeff: 0.009999999999999998
          kl: 0.009546530299082631
          policy_loss: -0.08051808811724186
          total_loss: -0.08512973686059316
          vf_explained_var: -0.6879231333732605
          vf_loss: 1.9789598703331042e-05
    num_agent_steps_sampled: 1857000
    num_agent_steps_trained: 1857000
    num_steps_sampled: 1857000
    num_steps_trained: 1857000
  iterations_since_restore: 1857
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1857,46722.8,1857000,0,0,0,368.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1858000
  custom_metrics: {}
  date: 2021-10-09_11-23-48
  done: false
  episode_len_mean: 369.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5245
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.387088102089729
          cur_lr: 5.000000000000001e-05
          entropy: 1.9080823792351618
          entropy_coeff: 0.009999999999999998
          kl: 0.0020123438757046587
          policy_loss: -0.20788239853249657
          total_loss: -0.22415618035528395
          vf_explained_var: -0.5486031770706177
          vf_loss: 1.574267918537468e-05
    num_agent_steps_sampled: 1858000
    num_agent_steps_trained: 1858000
    num_steps_sampled: 1858000
    num_steps_trained: 1858000
  iterations_since_restore: 1858

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1858,46746.3,1858000,-0.01,0,-1,369


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1859000
  custom_metrics: {}
  date: 2021-10-09_11-24-12
  done: false
  episode_len_mean: 369.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5247
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.704929678969913
          entropy_coeff: 0.009999999999999998
          kl: 0.011294341666194758
          policy_loss: -0.06832333124346203
          total_loss: -0.07752624690118763
          vf_explained_var: -0.9633907079696655
          vf_loss: 1.325810763369696e-05
    num_agent_steps_sampled: 1859000
    num_agent_steps_trained: 1859000
    num_steps_sampled: 1859000
    num_steps_trained: 1859000
  iterations_since_restore: 1859

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1859,46771,1859000,-0.01,0,-1,369.18




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1860000
  custom_metrics: {}
  date: 2021-10-09_11-24-52
  done: false
  episode_len_mean: 368.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5250
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9011176043086582
          entropy_coeff: 0.009999999999999998
          kl: 0.013991688416377497
          policy_loss: -0.06969218011945486
          total_loss: -0.07899078519807921
          vf_explained_var: -0.18319633603096008
          vf_loss: 8.71695663893964e-06
    num_agent_steps_sampled: 1860000
    num_agent_steps_trained: 1860000
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
  iterations_since_restore: 186

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1860,46811,1860000,-0.01,0,-1,368.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1861000
  custom_metrics: {}
  date: 2021-10-09_11-25-14
  done: false
  episode_len_mean: 369.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5253
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8167084760136074
          entropy_coeff: 0.009999999999999998
          kl: 0.014784953721688875
          policy_loss: -0.04294010727769799
          total_loss: -0.05083204358816147
          vf_explained_var: -0.5007151961326599
          vf_loss: 2.1132407639217693e-05
    num_agent_steps_sampled: 1861000
    num_agent_steps_trained: 1861000
    num_steps_sampled: 1861000
    num_steps_trained: 1861000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1861,46832.8,1861000,-0.01,0,-1,369.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1862000
  custom_metrics: {}
  date: 2021-10-09_11-25-39
  done: false
  episode_len_mean: 368.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5255
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7951981213357713
          entropy_coeff: 0.009999999999999998
          kl: 0.014344840093938613
          policy_loss: -0.11876549604866239
          total_loss: -0.12675928874976106
          vf_explained_var: -0.11451755464076996
          vf_loss: 9.409632449812003e-06
    num_agent_steps_sampled: 1862000
    num_agent_steps_trained: 1862000
    num_steps_sampled: 1862000
    num_steps_trained: 1862000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1862,46857.6,1862000,-0.01,0,-1,368.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1863000
  custom_metrics: {}
  date: 2021-10-09_11-26-02
  done: false
  episode_len_mean: 369.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5258
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7432585159937541
          entropy_coeff: 0.009999999999999998
          kl: 0.014859408004619191
          policy_loss: -0.0719394851062033
          total_loss: -0.0789581701780359
          vf_explained_var: -0.985410749912262
          vf_loss: 0.00010824622534831482
    num_agent_steps_sampled: 1863000
    num_agent_steps_trained: 1863000
    num_steps_sampled: 1863000
    num_steps_trained: 1863000
  iterations_since_restore: 1863


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1863,46880.5,1863000,-0.01,0,-1,369.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1864000
  custom_metrics: {}
  date: 2021-10-09_11-26-28
  done: false
  episode_len_mean: 369.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5261
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9669297046131557
          entropy_coeff: 0.009999999999999998
          kl: 0.012257089430613346
          policy_loss: -0.08862291239202022
          total_loss: -0.09977860086494023
          vf_explained_var: -0.3767033517360687
          vf_loss: 1.2777054371326812e-05
    num_agent_steps_sampled: 1864000
    num_agent_steps_trained: 1864000
    num_steps_sampled: 1864000
    num_steps_trained: 1864000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1864,46906.3,1864000,-0.01,0,-1,369.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1865000
  custom_metrics: {}
  date: 2021-10-09_11-26-52
  done: false
  episode_len_mean: 368.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5264
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7542550206184386
          entropy_coeff: 0.009999999999999998
          kl: 0.013456096017880092
          policy_loss: -0.0774244782825311
          total_loss: -0.0854544406135877
          vf_explained_var: -0.9850612282752991
          vf_loss: 0.00018018977187037106
    num_agent_steps_sampled: 1865000
    num_agent_steps_trained: 1865000
    num_steps_sampled: 1865000
    num_steps_trained: 1865000
  iterations_since_restore: 1865

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1865,46930.8,1865000,-0.01,0,-1,368.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1866000
  custom_metrics: {}
  date: 2021-10-09_11-27-16
  done: false
  episode_len_mean: 367.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5267
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8379531224568686
          entropy_coeff: 0.009999999999999998
          kl: 0.014862140655896312
          policy_loss: -0.009355784414543046
          total_loss: -0.017405164076222314
          vf_explained_var: -0.14206989109516144
          vf_loss: 2.2601095573312098e-05
    num_agent_steps_sampled: 1866000
    num_agent_steps_trained: 1866000
    num_steps_sampled: 1866000
    num_steps_trained: 1866000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1866,46954.2,1866000,-0.01,0,-1,367.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1867000
  custom_metrics: {}
  date: 2021-10-09_11-27-41
  done: false
  episode_len_mean: 364.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5270
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.639784202310774
          entropy_coeff: 0.009999999999999998
          kl: 0.01155629051977919
          policy_loss: -0.08603317667212751
          total_loss: -0.0944098992066251
          vf_explained_var: -0.5095383524894714
          vf_loss: 6.321867032157671e-06
    num_agent_steps_sampled: 1867000
    num_agent_steps_trained: 1867000
    num_steps_sampled: 1867000
    num_steps_trained: 1867000
  iterations_since_restore: 1867
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1867,46979.4,1867000,-0.01,0,-1,364.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1868000
  custom_metrics: {}
  date: 2021-10-09_11-28-07
  done: false
  episode_len_mean: 363.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5273
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.6908807516098023
          entropy_coeff: 0.009999999999999998
          kl: 0.010752991757551083
          policy_loss: -0.08538160673860047
          total_loss: -0.0947906607348058
          vf_explained_var: -0.4986304044723511
          vf_loss: 4.207804192649039e-05
    num_agent_steps_sampled: 1868000
    num_agent_steps_trained: 1868000
    num_steps_sampled: 1868000
    num_steps_trained: 1868000
  iterations_since_restore: 1868

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1868,47005.3,1868000,-0.01,0,-1,363.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1869000
  custom_metrics: {}
  date: 2021-10-09_11-28-31
  done: false
  episode_len_mean: 362.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5276
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.910354697704315
          entropy_coeff: 0.009999999999999998
          kl: 0.018047910310686435
          policy_loss: -0.04072744400343961
          total_loss: -0.04728133694993125
          vf_explained_var: 0.006911652162671089
          vf_loss: 3.263193388597251e-05
    num_agent_steps_sampled: 1869000
    num_agent_steps_trained: 1869000
    num_steps_sampled: 1869000
    num_steps_trained: 1869000
  iterations_since_restore: 186

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1869,47029,1869000,-0.01,0,-1,362.97


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1870000
  custom_metrics: {}
  date: 2021-10-09_11-28-55
  done: false
  episode_len_mean: 361.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5278
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.949172932571835
          entropy_coeff: 0.009999999999999998
          kl: 0.012219698002648174
          policy_loss: -0.1118239982260598
          total_loss: -0.1228303007574545
          vf_explained_var: -0.5333734154701233
          vf_loss: 1.052850647687996e-05
    num_agent_steps_sampled: 1870000
    num_agent_steps_trained: 1870000
    num_steps_sampled: 1870000
    num_steps_trained: 1870000
  iterations_since_restore: 1870
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1870,47053.1,1870000,-0.01,0,-1,361.69




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1871000
  custom_metrics: {}
  date: 2021-10-09_11-29-35
  done: false
  episode_len_mean: 361.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5281
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.869127212630378
          entropy_coeff: 0.009999999999999998
          kl: 0.01621060197153478
          policy_loss: 0.0017384521870149505
          total_loss: -0.00569534937126769
          vf_explained_var: -0.3398103415966034
          vf_loss: 1.4705110551302722e-05
    num_agent_steps_sampled: 1871000
    num_agent_steps_trained: 1871000
    num_steps_sampled: 1871000
    num_steps_trained: 1871000
  iterations_since_restore: 187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1871,47093.1,1871000,-0.01,0,-1,361.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1872000
  custom_metrics: {}
  date: 2021-10-09_11-29-58
  done: false
  episode_len_mean: 360.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5284
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.017224219110277
          entropy_coeff: 0.009999999999999998
          kl: 0.016242772804304174
          policy_loss: -0.17456627430187333
          total_loss: -0.18346473202109337
          vf_explained_var: 0.11889582872390747
          vf_loss: 8.704760812684576e-06
    num_agent_steps_sampled: 1872000
    num_agent_steps_trained: 1872000
    num_steps_sampled: 1872000
    num_steps_trained: 1872000
  iterations_since_restore: 1872

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1872,47116.8,1872000,-0.01,0,-1,360.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1873000
  custom_metrics: {}
  date: 2021-10-09_11-30-22
  done: false
  episode_len_mean: 360.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5287
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8905503471692404
          entropy_coeff: 0.009999999999999998
          kl: 0.011672485707355426
          policy_loss: -0.03900364898145199
          total_loss: -0.04980596713721752
          vf_explained_var: -0.9897376894950867
          vf_loss: 7.801589807362083e-06
    num_agent_steps_sampled: 1873000
    num_agent_steps_trained: 1873000
    num_steps_sampled: 1873000
    num_steps_trained: 1873000
  iterations_since_restore: 187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1873,47140.3,1873000,-0.01,0,-1,360.46


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1874000
  custom_metrics: {}
  date: 2021-10-09_11-30-46
  done: false
  episode_len_mean: 360.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5289
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9942765328619214
          entropy_coeff: 0.009999999999999998
          kl: 0.0099954017972128
          policy_loss: -0.0385099096223712
          total_loss: -0.05150806645138396
          vf_explained_var: -0.6343755722045898
          vf_loss: 1.2357125190950077e-05
    num_agent_steps_sampled: 1874000
    num_agent_steps_trained: 1874000
    num_steps_sampled: 1874000
    num_steps_trained: 1874000
  iterations_since_restore: 1874


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1874,47164.7,1874000,-0.01,0,-1,360.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1875000
  custom_metrics: {}
  date: 2021-10-09_11-31-08
  done: false
  episode_len_mean: 361.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5292
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8991998500294156
          entropy_coeff: 0.009999999999999998
          kl: 0.014663014356782167
          policy_loss: -0.026091619374023545
          total_loss: -0.03490929827094078
          vf_explained_var: -0.243347629904747
          vf_loss: 4.872918316323193e-06
    num_agent_steps_sampled: 1875000
    num_agent_steps_trained: 1875000
    num_steps_sampled: 1875000
    num_steps_trained: 1875000
  iterations_since_restore: 187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1875,47186.8,1875000,-0.01,0,-1,361.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1876000
  custom_metrics: {}
  date: 2021-10-09_11-31-30
  done: false
  episode_len_mean: 361.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9912209563785128
          entropy_coeff: 0.009999999999999998
          kl: 0.011726603520009643
          policy_loss: -0.06884183086868789
          total_loss: -0.0806150360032916
          vf_explained_var: -0.6280021071434021
          vf_loss: 6.087726814661841e-06
    num_agent_steps_sampled: 1876000
    num_agent_steps_trained: 1876000
    num_steps_sampled: 1876000
    num_steps_trained: 1876000
  iterations_since_restore: 1876

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1876,47208.1,1876000,-0.01,0,-1,361.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1877000
  custom_metrics: {}
  date: 2021-10-09_11-31-55
  done: false
  episode_len_mean: 361.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5297
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7245351884100173
          entropy_coeff: 0.009999999999999998
          kl: 0.009963650108055788
          policy_loss: -0.00013345543088184462
          total_loss: -0.010433753600551023
          vf_explained_var: -0.9481655359268188
          vf_loss: 3.4822115301519966e-05
    num_agent_steps_sampled: 1877000
    num_agent_steps_trained: 1877000
    num_steps_sampled: 1877000
    num_steps_trained: 1877000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1877,47233.2,1877000,-0.01,0,-1,361.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1878000
  custom_metrics: {}
  date: 2021-10-09_11-32-17
  done: false
  episode_len_mean: 361.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5300
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9526390353838603
          entropy_coeff: 0.009999999999999998
          kl: 0.013626270887692084
          policy_loss: -0.13005799829132028
          total_loss: -0.14012381691071724
          vf_explained_var: -0.44899871945381165
          vf_loss: 1.0154840487707083e-05
    num_agent_steps_sampled: 1878000
    num_agent_steps_trained: 1878000
    num_steps_sampled: 1878000
    num_steps_trained: 1878000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1878,47255.6,1878000,-0.01,0,-1,361.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1879000
  custom_metrics: {}
  date: 2021-10-09_11-32-43
  done: false
  episode_len_mean: 359.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5303
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.0494518518447875
          entropy_coeff: 0.009999999999999998
          kl: 0.01615078084956036
          policy_loss: -0.1427095069239537
          total_loss: -0.15199851389560434
          vf_explained_var: -0.7792857885360718
          vf_loss: 4.234808048093125e-06
    num_agent_steps_sampled: 1879000
    num_agent_steps_trained: 1879000
    num_steps_sampled: 1879000
    num_steps_trained: 1879000
  iterations_since_restore: 1879


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1879,47281.3,1879000,-0.01,0,-1,359.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1880000
  custom_metrics: {}
  date: 2021-10-09_11-33-07
  done: false
  episode_len_mean: 357.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5306
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.898286227385203
          entropy_coeff: 0.009999999999999998
          kl: 0.01530627533079015
          policy_loss: -0.09023861140012741
          total_loss: -0.09859713783694637
          vf_explained_var: -0.09419095516204834
          vf_loss: 8.755771594021806e-06
    num_agent_steps_sampled: 1880000
    num_agent_steps_trained: 1880000
    num_steps_sampled: 1880000
    num_steps_trained: 1880000
  iterations_since_restore: 1880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1880,47305.6,1880000,-0.01,0,-1,357.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1881000
  custom_metrics: {}
  date: 2021-10-09_11-33-32
  done: false
  episode_len_mean: 357.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5309
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.831657275888655
          entropy_coeff: 0.009999999999999998
          kl: 0.01303628831797376
          policy_loss: -0.1227024406608608
          total_loss: -0.1319696244266298
          vf_explained_var: -0.2596887946128845
          vf_loss: 8.148089927494261e-06
    num_agent_steps_sampled: 1881000
    num_agent_steps_trained: 1881000
    num_steps_sampled: 1881000
    num_steps_trained: 1881000
  iterations_since_restore: 1881
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1881,47329.8,1881000,-0.01,0,-1,357.98




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1882000
  custom_metrics: {}
  date: 2021-10-09_11-34-11
  done: false
  episode_len_mean: 358.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5311
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8879803895950318
          entropy_coeff: 0.009999999999999998
          kl: 0.011307134969439758
          policy_loss: -0.03640512060374022
          total_loss: -0.04743625971799095
          vf_explained_var: -0.2374507486820221
          vf_loss: 6.669335933414307e-06
    num_agent_steps_sampled: 1882000
    num_agent_steps_trained: 1882000
    num_steps_sampled: 1882000
    num_steps_trained: 1882000
  iterations_since_restore: 188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1882,47369.4,1882000,-0.01,0,-1,358.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1883000
  custom_metrics: {}
  date: 2021-10-09_11-34-33
  done: false
  episode_len_mean: 360.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5314
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.0246849126285977
          entropy_coeff: 0.009999999999999998
          kl: 0.016436871866053217
          policy_loss: -0.10822242548068364
          total_loss: -0.11706532939440674
          vf_explained_var: -0.1931808739900589
          vf_loss: 4.252675723313991e-06
    num_agent_steps_sampled: 1883000
    num_agent_steps_trained: 1883000
    num_steps_sampled: 1883000
    num_steps_trained: 1883000
  iterations_since_restore: 188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1883,47391.7,1883000,-0.01,0,-1,360.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1884000
  custom_metrics: {}
  date: 2021-10-09_11-34-54
  done: false
  episode_len_mean: 361.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5316
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9815977891286214
          entropy_coeff: 0.009999999999999998
          kl: 0.012699080810186914
          policy_loss: -0.09061143944660822
          total_loss: -0.10161483449240526
          vf_explained_var: -0.5357164144515991
          vf_loss: 5.212648934755432e-06
    num_agent_steps_sampled: 1884000
    num_agent_steps_trained: 1884000
    num_steps_sampled: 1884000
    num_steps_trained: 1884000
  iterations_since_restore: 188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1884,47412.1,1884000,-0.01,0,-1,361.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1885000
  custom_metrics: {}
  date: 2021-10-09_11-35-17
  done: false
  episode_len_mean: 361.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5319
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.0177748017840917
          entropy_coeff: 0.009999999999999998
          kl: 0.01409306761805327
          policy_loss: -0.05481660979696446
          total_loss: -0.06521322013189396
          vf_explained_var: -0.4803771674633026
          vf_loss: 6.975309949868662e-06
    num_agent_steps_sampled: 1885000
    num_agent_steps_trained: 1885000
    num_steps_sampled: 1885000
    num_steps_trained: 1885000
  iterations_since_restore: 1885

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1885,47435.4,1885000,-0.01,0,-1,361.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1886000
  custom_metrics: {}
  date: 2021-10-09_11-35-39
  done: false
  episode_len_mean: 361.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5322
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.974852446715037
          entropy_coeff: 0.009999999999999998
          kl: 0.017620891283444608
          policy_loss: -0.11047440710374051
          total_loss: -0.11799217358024584
          vf_explained_var: -0.1738748848438263
          vf_loss: 9.895296557260735e-06
    num_agent_steps_sampled: 1886000
    num_agent_steps_trained: 1886000
    num_steps_sampled: 1886000
    num_steps_trained: 1886000
  iterations_since_restore: 1886

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1886,47457,1886000,-0.01,0,-1,361.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1887000
  custom_metrics: {}
  date: 2021-10-09_11-36-02
  done: false
  episode_len_mean: 362.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5325
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9390661001205445
          entropy_coeff: 0.009999999999999998
          kl: 0.015157083340056863
          policy_loss: -0.06346767466101381
          total_loss: -0.0723411084463199
          vf_explained_var: -0.36520013213157654
          vf_loss: 5.120482223396216e-06
    num_agent_steps_sampled: 1887000
    num_agent_steps_trained: 1887000
    num_steps_sampled: 1887000
    num_steps_trained: 1887000
  iterations_since_restore: 1887

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1887,47480.1,1887000,-0.01,0,-1,362.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1888000
  custom_metrics: {}
  date: 2021-10-09_11-36-26
  done: false
  episode_len_mean: 362.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5327
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9272801399230957
          entropy_coeff: 0.009999999999999998
          kl: 0.013125895033379149
          policy_loss: -0.07820168112715085
          total_loss: -0.08835672912084394
          vf_explained_var: -0.6803299188613892
          vf_loss: 1.4365633261250979e-05
    num_agent_steps_sampled: 1888000
    num_agent_steps_trained: 1888000
    num_steps_sampled: 1888000
    num_steps_trained: 1888000
  iterations_since_restore: 188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1888,47503.8,1888000,-0.01,0,-1,362.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1889000
  custom_metrics: {}
  date: 2021-10-09_11-36-48
  done: false
  episode_len_mean: 361.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5330
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9472251693407694
          entropy_coeff: 0.009999999999999998
          kl: 0.014745746965333397
          policy_loss: -0.09595688461429543
          total_loss: -0.10515695390188032
          vf_explained_var: -0.5431771278381348
          vf_loss: 4.535646899562885e-05
    num_agent_steps_sampled: 1889000
    num_agent_steps_trained: 1889000
    num_steps_sampled: 1889000
    num_steps_trained: 1889000
  iterations_since_restore: 188

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1889,47525.9,1889000,-0.01,0,-1,361.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1890000
  custom_metrics: {}
  date: 2021-10-09_11-37-10
  done: false
  episode_len_mean: 363.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5333
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9019295069906446
          entropy_coeff: 0.009999999999999998
          kl: 0.018598196966785233
          policy_loss: -0.060552944035993685
          total_loss: -0.06660349513921472
          vf_explained_var: -0.9813178181648254
          vf_loss: 7.007670032079557e-05
    num_agent_steps_sampled: 1890000
    num_agent_steps_trained: 1890000
    num_steps_sampled: 1890000
    num_steps_trained: 1890000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1890,47548.1,1890000,-0.01,0,-1,363.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1891000
  custom_metrics: {}
  date: 2021-10-09_11-37-34
  done: false
  episode_len_mean: 362.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5336
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7650223122702704
          entropy_coeff: 0.009999999999999998
          kl: 0.017765805289759365
          policy_loss: -0.09635866100175514
          total_loss: -0.10166607023113304
          vf_explained_var: 0.11407267302274704
          vf_loss: 2.1444517286302775e-05
    num_agent_steps_sampled: 1891000
    num_agent_steps_trained: 1891000
    num_steps_sampled: 1891000
    num_steps_trained: 1891000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1891,47572.2,1891000,-0.01,0,-1,362.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1892000
  custom_metrics: {}
  date: 2021-10-09_11-37-59
  done: false
  episode_len_mean: 361.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5339
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.685216153992547
          entropy_coeff: 0.009999999999999998
          kl: 0.013289134219471954
          policy_loss: -0.051640160837107235
          total_loss: -0.059187888643807834
          vf_explained_var: -0.9211329221725464
          vf_loss: 8.783389442517849e-05
    num_agent_steps_sampled: 1892000
    num_agent_steps_trained: 1892000
    num_steps_sampled: 1892000
    num_steps_trained: 1892000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1892,47596.6,1892000,-0.01,0,-1,361.59




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1893000
  custom_metrics: {}
  date: 2021-10-09_11-38-40
  done: false
  episode_len_mean: 361.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5342
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8788372728559706
          entropy_coeff: 0.009999999999999998
          kl: 0.015099460142946164
          policy_loss: -0.06735451799920864
          total_loss: -0.07564597876949443
          vf_explained_var: -0.41390159726142883
          vf_loss: 2.4771229987487053e-05
    num_agent_steps_sampled: 1893000
    num_agent_steps_trained: 1893000
    num_steps_sampled: 1893000
    num_steps_trained: 1893000
  iterations_since_restore: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1893,47638,1893000,-0.01,0,-1,361


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1894000
  custom_metrics: {}
  date: 2021-10-09_11-39-04
  done: false
  episode_len_mean: 361.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5344
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9114869674046835
          entropy_coeff: 0.009999999999999998
          kl: 0.015407300905237332
          policy_loss: -0.04924581319921546
          total_loss: -0.057670420077111983
          vf_explained_var: -0.26586341857910156
          vf_loss: 4.621515467483227e-06
    num_agent_steps_sampled: 1894000
    num_agent_steps_trained: 1894000
    num_steps_sampled: 1894000
    num_steps_trained: 1894000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1894,47661.6,1894000,-0.01,0,-1,361.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1895000
  custom_metrics: {}
  date: 2021-10-09_11-39-26
  done: false
  episode_len_mean: 360.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5347
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8753892660140992
          entropy_coeff: 0.009999999999999998
          kl: 0.0114273941076907
          policy_loss: -0.04879003184744053
          total_loss: -0.05960977029883199
          vf_explained_var: -0.39668944478034973
          vf_loss: 8.753224018518166e-06
    num_agent_steps_sampled: 1895000
    num_agent_steps_trained: 1895000
    num_steps_sampled: 1895000
    num_steps_trained: 1895000
  iterations_since_restore: 1895
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1895,47683.8,1895000,0,0,0,360.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1896000
  custom_metrics: {}
  date: 2021-10-09_11-39-50
  done: false
  episode_len_mean: 362.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5350
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7513209607866076
          entropy_coeff: 0.009999999999999998
          kl: 0.015513897557892934
          policy_loss: -0.08232624330040482
          total_loss: -0.08906970040665732
          vf_explained_var: -0.5012547969818115
          vf_loss: 1.0177627473240136e-05
    num_agent_steps_sampled: 1896000
    num_agent_steps_trained: 1896000
    num_steps_sampled: 1896000
    num_steps_trained: 1896000
  iterations_since_restore: 1896


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1896,47707.6,1896000,0,0,0,362.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1897000
  custom_metrics: {}
  date: 2021-10-09_11-40-15
  done: false
  episode_len_mean: 359.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5353
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.010735817750295
          entropy_coeff: 0.009999999999999998
          kl: 0.01879775820070893
          policy_loss: -0.09784714318811893
          total_loss: -0.10490740160975191
          vf_explained_var: 0.19532328844070435
          vf_loss: 1.0027570758413479e-05
    num_agent_steps_sampled: 1897000
    num_agent_steps_trained: 1897000
    num_steps_sampled: 1897000
    num_steps_trained: 1897000
  iterations_since_restore: 1897
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1897,47732.6,1897000,0,0,0,359.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1898000
  custom_metrics: {}
  date: 2021-10-09_11-40-37
  done: false
  episode_len_mean: 360.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5355
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9436420453919305
          entropy_coeff: 0.009999999999999998
          kl: 0.014509012097438321
          policy_loss: -0.09987800733910666
          total_loss: -0.10924604282610946
          vf_explained_var: -0.38723745942115784
          vf_loss: 5.746424074004608e-06
    num_agent_steps_sampled: 1898000
    num_agent_steps_trained: 1898000
    num_steps_sampled: 1898000
    num_steps_trained: 1898000
  iterations_since_restore: 1898


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1898,47754.8,1898000,0,0,0,360.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1899000
  custom_metrics: {}
  date: 2021-10-09_11-41-00
  done: false
  episode_len_mean: 360.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5358
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9383243653509352
          entropy_coeff: 0.009999999999999998
          kl: 0.014627758939925172
          policy_loss: -0.059851726682649715
          total_loss: -0.0690861344544424
          vf_explained_var: -0.628504753112793
          vf_loss: 3.84209044012904e-06
    num_agent_steps_sampled: 1899000
    num_agent_steps_trained: 1899000
    num_steps_sampled: 1899000
    num_steps_trained: 1899000
  iterations_since_restore: 1899
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1899,47777.8,1899000,0,0,0,360.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1900000
  custom_metrics: {}
  date: 2021-10-09_11-41-23
  done: false
  episode_len_mean: 361.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5361
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9005109522077772
          entropy_coeff: 0.009999999999999998
          kl: 0.014247815761284402
          policy_loss: -0.07528046199844943
          total_loss: -0.0843839558876223
          vf_explained_var: -0.6026569604873657
          vf_loss: 2.0126905491856128e-05
    num_agent_steps_sampled: 1900000
    num_agent_steps_trained: 1900000
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
  iterations_since_restore: 1900
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1900,47800.9,1900000,0,0,0,361.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1901000
  custom_metrics: {}
  date: 2021-10-09_11-41-45
  done: false
  episode_len_mean: 362.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5363
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8857960873179966
          entropy_coeff: 0.009999999999999998
          kl: 0.01493124463797466
          policy_loss: -0.02082389219560557
          total_loss: -0.029266490476826827
          vf_explained_var: -0.45875900983810425
          vf_loss: 5.9889630119869255e-05
    num_agent_steps_sampled: 1901000
    num_agent_steps_trained: 1901000
    num_steps_sampled: 1901000
    num_steps_trained: 1901000
  iterations_since_restore: 1901

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1901,47822.8,1901000,0,0,0,362.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1902000
  custom_metrics: {}
  date: 2021-10-09_11-42-05
  done: false
  episode_len_mean: 363.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5366
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9461447636286417
          entropy_coeff: 0.009999999999999998
          kl: 0.015260462904525715
          policy_loss: -0.052356847292847106
          total_loss: -0.061220222752955224
          vf_explained_var: -0.28527989983558655
          vf_loss: 1.426842759428837e-05
    num_agent_steps_sampled: 1902000
    num_agent_steps_trained: 1902000
    num_steps_sampled: 1902000
    num_steps_trained: 1902000
  iterations_since_restore: 190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1902,47843.3,1902000,0,0,0,363.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1903000
  custom_metrics: {}
  date: 2021-10-09_11-42-29
  done: false
  episode_len_mean: 365.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5368
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.037217871348063
          entropy_coeff: 0.009999999999999998
          kl: 0.00975675188801188
          policy_loss: -0.010738648132731517
          total_loss: -0.02433350771251652
          vf_explained_var: -0.5285755395889282
          vf_loss: 1.0583945537392639e-05
    num_agent_steps_sampled: 1903000
    num_agent_steps_trained: 1903000
    num_steps_sampled: 1903000
    num_steps_trained: 1903000
  iterations_since_restore: 1903
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1903,47866.4,1903000,0,0,0,365.49




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1904000
  custom_metrics: {}
  date: 2021-10-09_11-43-07
  done: false
  episode_len_mean: 366.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5371
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.518384145365821
          entropy_coeff: 0.009999999999999998
          kl: 0.008928867341127567
          policy_loss: -0.04998645111401048
          total_loss: -0.023240020141626398
          vf_explained_var: -0.45375263690948486
          vf_loss: 0.03573770983024992
    num_agent_steps_sampled: 1904000
    num_agent_steps_trained: 1904000
    num_steps_sampled: 1904000
    num_steps_trained: 1904000
  iterations_since_restore: 1904
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1904,47905,1904000,0,0,0,366.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1905000
  custom_metrics: {}
  date: 2021-10-09_11-43-30
  done: false
  episode_len_mean: 367.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5373
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8993225680457222
          entropy_coeff: 0.009999999999999998
          kl: 0.014302364583753837
          policy_loss: -0.11619347189035681
          total_loss: -0.12132486245698398
          vf_explained_var: 0.08444968611001968
          vf_loss: 0.003942515604042758
    num_agent_steps_sampled: 1905000
    num_agent_steps_trained: 1905000
    num_steps_sampled: 1905000
    num_steps_trained: 1905000
  iterations_since_restore: 1905
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1905,47927.7,1905000,0,0,0,367.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1906000
  custom_metrics: {}
  date: 2021-10-09_11-43-53
  done: false
  episode_len_mean: 367.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5376
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9605867385864257
          entropy_coeff: 0.009999999999999998
          kl: 0.014214037166830253
          policy_loss: -0.11389207862731483
          total_loss: -0.12083464573240943
          vf_explained_var: -0.19939853250980377
          vf_loss: 0.0028052375703636143
    num_agent_steps_sampled: 1906000
    num_agent_steps_trained: 1906000
    num_steps_sampled: 1906000
    num_steps_trained: 1906000
  iterations_since_restore: 1906


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1906,47950.8,1906000,0,0,0,367.98


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1907000
  custom_metrics: {}
  date: 2021-10-09_11-44-16
  done: false
  episode_len_mean: 368.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5379
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9334606766700744
          entropy_coeff: 0.009999999999999998
          kl: 0.01580155237837116
          policy_loss: -0.09075359721771545
          total_loss: -0.09715463493226303
          vf_explained_var: -0.5005847811698914
          vf_loss: 0.0019744980401204277
    num_agent_steps_sampled: 1907000
    num_agent_steps_trained: 1907000
    num_steps_sampled: 1907000
    num_steps_trained: 1907000
  iterations_since_restore: 1907
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1907,47974.2,1907000,0,0,0,368.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1908000
  custom_metrics: {}
  date: 2021-10-09_11-44-40
  done: false
  episode_len_mean: 368.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5381
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9101488908131918
          entropy_coeff: 0.009999999999999998
          kl: 0.016373005208548678
          policy_loss: -0.065766999249657
          total_loss: -0.07233435101807117
          vf_explained_var: -0.5414477586746216
          vf_loss: 0.001178737261539532
    num_agent_steps_sampled: 1908000
    num_agent_steps_trained: 1908000
    num_steps_sampled: 1908000
    num_steps_trained: 1908000
  iterations_since_restore: 1908
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1908,47997.5,1908000,0,0,0,368.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1909000
  custom_metrics: {}
  date: 2021-10-09_11-45-02
  done: false
  episode_len_mean: 369.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5384
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.0159161978297764
          entropy_coeff: 0.009999999999999998
          kl: 0.017368782284536716
          policy_loss: -0.11003757123317984
          total_loss: -0.11704383469704124
          vf_explained_var: -0.5823313593864441
          vf_loss: 0.0011068838603225434
    num_agent_steps_sampled: 1909000
    num_agent_steps_trained: 1909000
    num_steps_sampled: 1909000
    num_steps_trained: 1909000
  iterations_since_restore: 1909
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1909,48019.6,1909000,0,0,0,369.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1910000
  custom_metrics: {}
  date: 2021-10-09_11-45-23
  done: false
  episode_len_mean: 370.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5386
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9646578272183737
          entropy_coeff: 0.009999999999999998
          kl: 0.014212958314478956
          policy_loss: -0.08652354056636492
          total_loss: -0.09548331143127547
          vf_explained_var: -1.0
          vf_loss: 0.0008294958588925915
    num_agent_steps_sampled: 1910000
    num_agent_steps_trained: 1910000
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
  iterations_since_restore: 1910
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1910,48040.8,1910000,0,0,0,370.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1911000
  custom_metrics: {}
  date: 2021-10-09_11-45-48
  done: false
  episode_len_mean: 370.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5389
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.802977724870046
          entropy_coeff: 0.009999999999999998
          kl: 0.013375117089484018
          policy_loss: -0.12943049562681053
          total_loss: -0.1374411988279058
          vf_explained_var: -0.7993392944335938
          vf_loss: 0.000742842329135682
    num_agent_steps_sampled: 1911000
    num_agent_steps_trained: 1911000
    num_steps_sampled: 1911000
    num_steps_trained: 1911000
  iterations_since_restore: 1911
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1911,48065.4,1911000,0,0,0,370.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1912000
  custom_metrics: {}
  date: 2021-10-09_11-46-10
  done: false
  episode_len_mean: 370.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5392
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 2.0135435024897257
          entropy_coeff: 0.009999999999999998
          kl: 0.014412442089747736
          policy_loss: -0.05180508204632335
          total_loss: -0.06114961802959442
          vf_explained_var: -0.506903350353241
          vf_loss: 0.0007952364714583382
    num_agent_steps_sampled: 1912000
    num_agent_steps_trained: 1912000
    num_steps_sampled: 1912000
    num_steps_trained: 1912000
  iterations_since_restore: 1912
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1912,48087.4,1912000,0,0,0,370.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1913000
  custom_metrics: {}
  date: 2021-10-09_11-46-31
  done: false
  episode_len_mean: 371.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5394
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9369861894183689
          entropy_coeff: 0.009999999999999998
          kl: 0.014350513880828232
          policy_loss: -0.12928540193372304
          total_loss: -0.13802687793763146
          vf_explained_var: -0.793556809425354
          vf_loss: 0.0006756700277845893
    num_agent_steps_sampled: 1913000
    num_agent_steps_trained: 1913000
    num_steps_sampled: 1913000
    num_steps_trained: 1913000
  iterations_since_restore: 1913
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1913,48108.9,1913000,0,0,0,371.38


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1914000
  custom_metrics: {}
  date: 2021-10-09_11-46-52
  done: false
  episode_len_mean: 373.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5397
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9167103926340738
          entropy_coeff: 0.009999999999999998
          kl: 0.01419900651769797
          policy_loss: -0.10458480036920971
          total_loss: -0.11348765198555258
          vf_explained_var: -0.7030623555183411
          vf_loss: 0.00041661859399432107
    num_agent_steps_sampled: 1914000
    num_agent_steps_trained: 1914000
    num_steps_sampled: 1914000
    num_steps_trained: 1914000
  iterations_since_restore: 1914
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1914,48130,1914000,0,0,0,373.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1915000
  custom_metrics: {}
  date: 2021-10-09_11-47-17
  done: false
  episode_len_mean: 372.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5399
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.968334194024404
          entropy_coeff: 0.009999999999999998
          kl: 0.013670705092464047
          policy_loss: -0.06660332526597712
          total_loss: -0.07640809168418249
          vf_explained_var: -0.998530924320221
          vf_loss: 0.0003973416182109051
    num_agent_steps_sampled: 1915000
    num_agent_steps_trained: 1915000
    num_steps_sampled: 1915000
    num_steps_trained: 1915000
  iterations_since_restore: 1915
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1915,48154.3,1915000,0,0,0,372.46




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1916000
  custom_metrics: {}
  date: 2021-10-09_11-47-57
  done: false
  episode_len_mean: 372.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5402
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.936153855588701
          entropy_coeff: 0.009999999999999998
          kl: 0.015448779773744252
          policy_loss: -0.10649223395933707
          total_loss: -0.11475530819346508
          vf_explained_var: -0.41414448618888855
          vf_loss: 0.0003840521783179914
    num_agent_steps_sampled: 1916000
    num_agent_steps_trained: 1916000
    num_steps_sampled: 1916000
    num_steps_trained: 1916000
  iterations_since_restore: 1916
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1916,48194.4,1916000,0,0,0,372.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1917000
  custom_metrics: {}
  date: 2021-10-09_11-48-20
  done: false
  episode_len_mean: 373.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5405
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9617706855138144
          entropy_coeff: 0.009999999999999998
          kl: 0.018071785590481643
          policy_loss: -0.15071223167081674
          total_loss: -0.1574729521241453
          vf_explained_var: -0.48054182529449463
          vf_loss: 0.00032340795733034613
    num_agent_steps_sampled: 1917000
    num_agent_steps_trained: 1917000
    num_steps_sampled: 1917000
    num_steps_trained: 1917000
  iterations_since_restore: 1917


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1917,48218,1917000,0,0,0,373.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1918000
  custom_metrics: {}
  date: 2021-10-09_11-48-44
  done: false
  episode_len_mean: 373.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5408
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.838150315814548
          entropy_coeff: 0.009999999999999998
          kl: 0.015521209717829424
          policy_loss: -0.10440919912523694
          total_loss: -0.1116454438202911
          vf_explained_var: -0.7281973958015442
          vf_loss: 0.00038061649231369504
    num_agent_steps_sampled: 1918000
    num_agent_steps_trained: 1918000
    num_steps_sampled: 1918000
    num_steps_trained: 1918000
  iterations_since_restore: 1918
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1918,48241.6,1918000,0,0,0,373.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1919000
  custom_metrics: {}
  date: 2021-10-09_11-49-08
  done: false
  episode_len_mean: 373.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5411
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.847327348921034
          entropy_coeff: 0.009999999999999998
          kl: 0.013370090701987596
          policy_loss: -0.07089614715013239
          total_loss: -0.07993246677021186
          vf_explained_var: -0.5365679264068604
          vf_loss: 0.00016420678528245643
    num_agent_steps_sampled: 1919000
    num_agent_steps_trained: 1919000
    num_steps_sampled: 1919000
    num_steps_trained: 1919000
  iterations_since_restore: 1919
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1919,48265,1919000,0,0,0,373.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1920000
  custom_metrics: {}
  date: 2021-10-09_11-49-31
  done: false
  episode_len_mean: 372.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5413
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9071304387516446
          entropy_coeff: 0.009999999999999998
          kl: 0.013883152462253653
          policy_loss: -0.07740770058913363
          total_loss: -0.08670570597880417
          vf_explained_var: -0.7369715571403503
          vf_loss: 0.00014472074706443688
    num_agent_steps_sampled: 1920000
    num_agent_steps_trained: 1920000
    num_steps_sampled: 1920000
    num_steps_trained: 1920000
  iterations_since_restore: 1920


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1920,48288.9,1920000,0,0,0,372.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1921000
  custom_metrics: {}
  date: 2021-10-09_11-49-54
  done: false
  episode_len_mean: 371.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5416
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8584823515680102
          entropy_coeff: 0.009999999999999998
          kl: 0.01341183555901178
          policy_loss: -0.12753885280754831
          total_loss: -0.1366803534535898
          vf_explained_var: -0.7468479871749878
          vf_loss: 0.00014162179545059594
    num_agent_steps_sampled: 1921000
    num_agent_steps_trained: 1921000
    num_steps_sampled: 1921000
    num_steps_trained: 1921000
  iterations_since_restore: 1921
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1921,48311.9,1921000,0,0,0,371.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1922000
  custom_metrics: {}
  date: 2021-10-09_11-50-17
  done: false
  episode_len_mean: 372.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5419
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.9856893526183235
          entropy_coeff: 0.009999999999999998
          kl: 0.015673868147962174
          policy_loss: -0.09214439559727908
          total_loss: -0.10102820953147279
          vf_explained_var: -0.9851739406585693
          vf_loss: 0.00010256084759829618
    num_agent_steps_sampled: 1922000
    num_agent_steps_trained: 1922000
    num_steps_sampled: 1922000
    num_steps_trained: 1922000
  iterations_since_restore: 1922


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1922,48334.7,1922000,0,0,0,372.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1923000
  custom_metrics: {}
  date: 2021-10-09_11-50-42
  done: false
  episode_len_mean: 370.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5422
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.8497815688451131
          entropy_coeff: 0.009999999999999998
          kl: 0.012242139620792564
          policy_loss: -0.06368187684565782
          total_loss: -0.07361013810667727
          vf_explained_var: -0.6667149066925049
          vf_loss: 7.909180579493599e-05
    num_agent_steps_sampled: 1923000
    num_agent_steps_trained: 1923000
    num_steps_sampled: 1923000
    num_steps_trained: 1923000
  iterations_since_restore: 1923
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1923,48359.3,1923000,0,0,0,370.78


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1924000
  custom_metrics: {}
  date: 2021-10-09_11-51-06
  done: false
  episode_len_mean: 369.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5424
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.7848605102962918
          entropy_coeff: 0.009999999999999998
          kl: 0.0024377640318899775
          policy_loss: -0.15145107491148843
          total_loss: -0.16753355678584841
          vf_explained_var: -0.12909767031669617
          vf_loss: 7.542642957155799e-05
    num_agent_steps_sampled: 1924000
    num_agent_steps_trained: 1924000
    num_steps_sampled: 1924000
    num_steps_trained: 1924000
  iterations_since_restore: 1924


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1924,48383.1,1924000,0,0,0,369


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1925000
  custom_metrics: {}
  date: 2021-10-09_11-51-31
  done: false
  episode_len_mean: 370.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5427
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 1.460485526588228
          entropy_coeff: 0.009999999999999998
          kl: 0.023347690866420065
          policy_loss: -0.0987198590197497
          total_loss: -0.10509619381692674
          vf_explained_var: 0.3267097771167755
          vf_loss: 0.00013219233090543033
    num_agent_steps_sampled: 1925000
    num_agent_steps_trained: 1925000
    num_steps_sampled: 1925000
    num_steps_trained: 1925000
  iterations_since_restore: 1925

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1925,48407.9,1925000,-0.04,0,-4,370.17




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1926000
  custom_metrics: {}
  date: 2021-10-09_11-52-15
  done: false
  episode_len_mean: 368.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 5431
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.9214986191855536
          entropy_coeff: 0.009999999999999998
          kl: 0.019044420048181247
          policy_loss: -0.036126839286751214
          total_loss: -0.04534202342232068
          vf_explained_var: -0.572702169418335
          vf_loss: 9.369404304177604e-05
    num_agent_steps_sampled: 1926000
    num_agent_steps_trained: 1926000
    num_steps_sampled: 1926000
    num_steps_trained: 1926000
  iterations_since_restore: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1926,48452,1926000,-0.04,0,-4,368.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1927000
  custom_metrics: {}
  date: 2021-10-09_11-52-35
  done: false
  episode_len_mean: 369.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5433
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.974540768729316
          entropy_coeff: 0.009999999999999998
          kl: 0.01717895124391943
          policy_loss: -0.1385190269185437
          total_loss: -0.14924972711337936
          vf_explained_var: -0.6452751159667969
          vf_loss: 7.893714310840652e-05
    num_agent_steps_sampled: 1927000
    num_agent_steps_trained: 1927000
    num_steps_sampled: 1927000
    num_steps_trained: 1927000
  iterations_since_restore: 1927
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1927,48472.8,1927000,-0.04,0,-4,369.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1928000
  custom_metrics: {}
  date: 2021-10-09_11-52-56
  done: false
  episode_len_mean: 371.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5435
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.8646179729037815
          entropy_coeff: 0.009999999999999998
          kl: 0.015498439221475892
          policy_loss: -0.05142050948407915
          total_loss: -0.06192977569169468
          vf_explained_var: -0.8018876910209656
          vf_loss: 7.52740508889676e-05
    num_agent_steps_sampled: 1928000
    num_agent_steps_trained: 1928000
    num_steps_sampled: 1928000
    num_steps_trained: 1928000
  iterations_since_restore: 1928


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1928,48492.9,1928000,-0.04,0,-4,371.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1929000
  custom_metrics: {}
  date: 2021-10-09_11-53-20
  done: false
  episode_len_mean: 371.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5438
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.7982608159383138
          entropy_coeff: 0.009999999999999998
          kl: 0.02176816443090317
          policy_loss: -0.1085665992150704
          total_loss: -0.11519648441010051
          vf_explained_var: -0.2552322745323181
          vf_loss: 2.98364479426204e-05
    num_agent_steps_sampled: 1929000
    num_agent_steps_trained: 1929000
    num_steps_sampled: 1929000
    num_steps_trained: 1929000
  iterations_since_restore: 1929
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1929,48517.2,1929000,-0.04,0,-4,371.31


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1930000
  custom_metrics: {}
  date: 2021-10-09_11-53-43
  done: false
  episode_len_mean: 372.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5441
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.9191334380043878
          entropy_coeff: 0.009999999999999998
          kl: 0.014441705456789815
          policy_loss: -0.10819393951031897
          total_loss: -0.11606161525059078
          vf_explained_var: -0.6440639495849609
          vf_loss: 5.570473707242248e-05
    num_agent_steps_sampled: 1930000
    num_agent_steps_trained: 1930000
    num_steps_sampled: 1930000
    num_steps_trained: 1930000
  iterations_since_restore: 1930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1930,48540,1930000,-0.04,0,-4,372.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1931000
  custom_metrics: {}
  date: 2021-10-09_11-54-02
  done: false
  episode_len_mean: 373.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5443
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.8702641235457527
          entropy_coeff: 0.009999999999999998
          kl: 0.01128716546985393
          policy_loss: -0.09173349797104796
          total_loss: -0.10157876979145739
          vf_explained_var: -0.5967732667922974
          vf_loss: 5.070391984595012e-05
    num_agent_steps_sampled: 1931000
    num_agent_steps_trained: 1931000
    num_steps_sampled: 1931000
    num_steps_trained: 1931000
  iterations_since_restore: 1931

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1931,48559.7,1931000,-0.04,0,-4,373.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1932000
  custom_metrics: {}
  date: 2021-10-09_11-54-24
  done: false
  episode_len_mean: 375.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5446
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.6578357789251539
          entropy_coeff: 0.009999999999999998
          kl: 0.01458637330409519
          policy_loss: -0.11702233192821344
          total_loss: -0.12215747713214821
          vf_explained_var: -0.6584978103637695
          vf_loss: 6.238117049280036e-05
    num_agent_steps_sampled: 1932000
    num_agent_steps_trained: 1932000
    num_steps_sampled: 1932000
    num_steps_trained: 1932000
  iterations_since_restore: 1932

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1932,48581.6,1932000,-0.04,0,-4,375.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1933000
  custom_metrics: {}
  date: 2021-10-09_11-54-48
  done: false
  episode_len_mean: 374.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5449
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.7859946396615771
          entropy_coeff: 0.009999999999999998
          kl: 0.013464261363839825
          policy_loss: -0.1189461717589034
          total_loss: -0.12626109686162737
          vf_explained_var: -0.5985758900642395
          vf_loss: 3.970605014021405e-05
    num_agent_steps_sampled: 1933000
    num_agent_steps_trained: 1933000
    num_steps_sampled: 1933000
    num_steps_trained: 1933000
  iterations_since_restore: 1933

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1933,48605.1,1933000,-0.04,0,-4,374.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1934000
  custom_metrics: {}
  date: 2021-10-09_11-55-12
  done: false
  episode_len_mean: 373.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5451
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.9448469916979472
          entropy_coeff: 0.009999999999999998
          kl: 0.010181571133763058
          policy_loss: -0.15819947599536843
          total_loss: -0.16968100650442972
          vf_explained_var: -0.44826167821884155
          vf_loss: 2.2899715152865003e-05
    num_agent_steps_sampled: 1934000
    num_agent_steps_trained: 1934000
    num_steps_sampled: 1934000
    num_steps_trained: 1934000
  iterations_since_restore: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1934,48629.6,1934000,-0.04,0,-4,373.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1935000
  custom_metrics: {}
  date: 2021-10-09_11-55-35
  done: false
  episode_len_mean: 374.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5454
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.771481408013238
          entropy_coeff: 0.009999999999999998
          kl: 0.012453714116172894
          policy_loss: -0.036205021623108125
          total_loss: 0.044243294911252125
          vf_explained_var: -0.47305577993392944
          vf_loss: 0.08844628107084039
    num_agent_steps_sampled: 1935000
    num_agent_steps_trained: 1935000
    num_steps_sampled: 1935000
    num_steps_trained: 1935000
  iterations_since_restore: 1935

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1935,48652.5,1935000,-0.07,0,-4,374.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1936000
  custom_metrics: {}
  date: 2021-10-09_11-55-59
  done: false
  episode_len_mean: 374.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5457
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.9348152293099297
          entropy_coeff: 0.009999999999999998
          kl: 0.017144209514561155
          policy_loss: -0.1525955974848734
          total_loss: -0.15163514752768809
          vf_explained_var: -0.21773329377174377
          vf_loss: 0.006932052662078705
    num_agent_steps_sampled: 1936000
    num_agent_steps_trained: 1936000
    num_steps_sampled: 1936000
    num_steps_trained: 1936000
  iterations_since_restore: 1936

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1936,48675.8,1936000,-0.07,0,-4,374.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1937000
  custom_metrics: {}
  date: 2021-10-09_11-56-20
  done: false
  episode_len_mean: 374.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5459
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 2.0207107292281257
          entropy_coeff: 0.009999999999999998
          kl: 0.014741564844461683
          policy_loss: -0.08897740176568429
          total_loss: -0.09393164364414083
          vf_explained_var: -0.4393731355667114
          vf_loss: 0.003750950108385748
    num_agent_steps_sampled: 1937000
    num_agent_steps_trained: 1937000
    num_steps_sampled: 1937000
    num_steps_trained: 1937000
  iterations_since_restore: 1937

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1937,48697.3,1937000,-0.07,0,-4,374.51




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1938000
  custom_metrics: {}
  date: 2021-10-09_11-56-59
  done: false
  episode_len_mean: 374.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5462
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.8129457394282023
          entropy_coeff: 0.009999999999999998
          kl: 0.018681529430930422
          policy_loss: -0.05657536827234758
          total_loss: -0.055750842971934216
          vf_explained_var: -0.1981654316186905
          vf_loss: 0.0043779635284509925
    num_agent_steps_sampled: 1938000
    num_agent_steps_trained: 1938000
    num_steps_sampled: 1938000
    num_steps_trained: 1938000
  iterations_since_restore: 193

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1938,48736.4,1938000,-0.07,0,-4,374.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1939000
  custom_metrics: {}
  date: 2021-10-09_11-57-23
  done: false
  episode_len_mean: 373.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5465
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.676280383268992
          entropy_coeff: 0.009999999999999998
          kl: 0.014210346101220721
          policy_loss: -0.08703949031316571
          total_loss: -0.09028036083198256
          vf_explained_var: -0.6662111282348633
          vf_loss: 0.0024344921021515298
    num_agent_steps_sampled: 1939000
    num_agent_steps_trained: 1939000
    num_steps_sampled: 1939000
    num_steps_trained: 1939000
  iterations_since_restore: 1939

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1939,48760.3,1939000,-0.07,0,-4,373.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1940000
  custom_metrics: {}
  date: 2021-10-09_11-57-46
  done: false
  episode_len_mean: 372.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5467
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.9272915720939636
          entropy_coeff: 0.009999999999999998
          kl: 0.016692824721201975
          policy_loss: -0.10283654085877869
          total_loss: -0.10713837107436525
          vf_explained_var: -0.7316718101501465
          vf_loss: 0.001946723433987548
    num_agent_steps_sampled: 1940000
    num_agent_steps_trained: 1940000
    num_steps_sampled: 1940000
    num_steps_trained: 1940000
  iterations_since_restore: 1940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1940,48783.4,1940000,-0.07,0,-4,372.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1941000
  custom_metrics: {}
  date: 2021-10-09_11-58-10
  done: false
  episode_len_mean: 372.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5470
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.8915889302889506
          entropy_coeff: 0.009999999999999998
          kl: 0.015010298612687024
          policy_loss: -0.0579140093177557
          total_loss: -0.06262846533209085
          vf_explained_var: -0.4344439208507538
          vf_loss: 0.0024898407489268317
    num_agent_steps_sampled: 1941000
    num_agent_steps_trained: 1941000
    num_steps_sampled: 1941000
    num_steps_trained: 1941000
  iterations_since_restore: 1941

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1941,48807.1,1941000,-0.07,0,-4,372.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1942000
  custom_metrics: {}
  date: 2021-10-09_11-58-34
  done: false
  episode_len_mean: 371.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5473
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.779486244254642
          entropy_coeff: 0.009999999999999998
          kl: 0.012549868048240459
          policy_loss: -0.0961076672292418
          total_loss: -0.10311696268618106
          vf_explained_var: -0.5582568049430847
          vf_loss: 0.0009936952323187143
    num_agent_steps_sampled: 1942000
    num_agent_steps_trained: 1942000
    num_steps_sampled: 1942000
    num_steps_trained: 1942000
  iterations_since_restore: 1942


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1942,48830.7,1942000,-0.07,0,-4,371.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1943000
  custom_metrics: {}
  date: 2021-10-09_11-58-59
  done: false
  episode_len_mean: 370.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5476
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 1.6407833576202393
          entropy_coeff: 0.009999999999999998
          kl: 0.02006257574163328
          policy_loss: -0.13759752561648686
          total_loss: -0.1372600788043605
          vf_explained_var: -0.07019060105085373
          vf_loss: 0.0010917168983723968
    num_agent_steps_sampled: 1943000
    num_agent_steps_trained: 1943000
    num_steps_sampled: 1943000
    num_steps_trained: 1943000
  iterations_since_restore: 1943


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1943,48855.9,1943000,-0.07,0,-4,370.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1944000
  custom_metrics: {}
  date: 2021-10-09_11-59-23
  done: false
  episode_len_mean: 370.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5479
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.9419179399808248
          entropy_coeff: 0.009999999999999998
          kl: 0.012273608732035039
          policy_loss: -0.10713289814690749
          total_loss: -0.1115521995143758
          vf_explained_var: -0.4893251061439514
          vf_loss: 0.0006353900286032714
    num_agent_steps_sampled: 1944000
    num_agent_steps_trained: 1944000
    num_steps_sampled: 1944000
    num_steps_trained: 1944000
  iterations_since_restore: 1944


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1944,48879.7,1944000,-0.07,0,-4,370.87


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1945000
  custom_metrics: {}
  date: 2021-10-09_11-59-47
  done: false
  episode_len_mean: 370.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5481
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.4967008537716335
          entropy_coeff: 0.009999999999999998
          kl: 0.008598421350972213
          policy_loss: -0.1208534555716647
          total_loss: -0.1252166472789314
          vf_explained_var: -0.9382247924804688
          vf_loss: 0.0005406085993551339
    num_agent_steps_sampled: 1945000
    num_agent_steps_trained: 1945000
    num_steps_sampled: 1945000
    num_steps_trained: 1945000
  iterations_since_restore: 1945
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1945,48903.6,1945000,-0.07,0,-4,370.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1946000
  custom_metrics: {}
  date: 2021-10-09_12-00-08
  done: false
  episode_len_mean: 370.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5484
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.9429355488883124
          entropy_coeff: 0.009999999999999998
          kl: 0.010239959979741907
          policy_loss: -0.01804818877329429
          total_loss: -0.024942621712883313
          vf_explained_var: -0.5955865383148193
          vf_loss: 0.0005505295557668433
    num_agent_steps_sampled: 1946000
    num_agent_steps_trained: 1946000
    num_steps_sampled: 1946000
    num_steps_trained: 1946000
  iterations_since_restore: 194

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1946,48924.9,1946000,-0.07,0,-4,370.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1947000
  custom_metrics: {}
  date: 2021-10-09_12-00-30
  done: false
  episode_len_mean: 370.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5486
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 2.029023730754852
          entropy_coeff: 0.009999999999999998
          kl: 0.01136237086832467
          policy_loss: -0.0939447837571303
          total_loss: -0.10045482069253922
          vf_explained_var: -0.994999885559082
          vf_loss: 0.000482184486463666
    num_agent_steps_sampled: 1947000
    num_agent_steps_trained: 1947000
    num_steps_sampled: 1947000
    num_steps_trained: 1947000
  iterations_since_restore: 1947
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1947,48947.4,1947000,-0.07,0,-4,370.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1948000
  custom_metrics: {}
  date: 2021-10-09_12-00-55
  done: false
  episode_len_mean: 370.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5489
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.816226671801673
          entropy_coeff: 0.009999999999999998
          kl: 0.01397253766305562
          policy_loss: -0.1109656133585506
          total_loss: -0.11224482307831446
          vf_explained_var: -0.5909890532493591
          vf_loss: 0.0005302203883830872
    num_agent_steps_sampled: 1948000
    num_agent_steps_trained: 1948000
    num_steps_sampled: 1948000
    num_steps_trained: 1948000
  iterations_since_restore: 1948
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1948,48971.7,1948000,-0.07,0,-4,370.91




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1949000
  custom_metrics: {}
  date: 2021-10-09_12-01-36
  done: false
  episode_len_mean: 368.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5492
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.8849738942252265
          entropy_coeff: 0.009999999999999998
          kl: 0.011994767796555046
          policy_loss: -0.14412620448403887
          total_loss: -0.14857212718990115
          vf_explained_var: -0.8646891117095947
          vf_loss: 0.0003656733929852231
    num_agent_steps_sampled: 1949000
    num_agent_steps_trained: 1949000
    num_steps_sampled: 1949000
    num_steps_trained: 1949000
  iterations_since_restore: 1949

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1949,49012.5,1949000,-0.07,0,-4,368.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1950000
  custom_metrics: {}
  date: 2021-10-09_12-01-57
  done: false
  episode_len_mean: 368.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5494
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.892332793606652
          entropy_coeff: 0.009999999999999998
          kl: 0.0121033169896128
          policy_loss: -0.07851611326138179
          total_loss: -0.08299400181406075
          vf_explained_var: -0.8108152747154236
          vf_loss: 0.00028025786709299106
    num_agent_steps_sampled: 1950000
    num_agent_steps_trained: 1950000
    num_steps_sampled: 1950000
    num_steps_trained: 1950000
  iterations_since_restore: 1950
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1950,49033.4,1950000,-0.07,0,-4,368.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1951000
  custom_metrics: {}
  date: 2021-10-09_12-02-17
  done: false
  episode_len_mean: 368.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5497
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.7763790210088095
          entropy_coeff: 0.009999999999999998
          kl: 0.011284475611645034
          policy_loss: -0.11072305502990881
          total_loss: -0.11497831783360905
          vf_explained_var: -0.7979243993759155
          vf_loss: 0.00030167699717114575
    num_agent_steps_sampled: 1951000
    num_agent_steps_trained: 1951000
    num_steps_sampled: 1951000
    num_steps_trained: 1951000
  iterations_since_restore: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1951,49054.2,1951000,-0.07,0,-4,368.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1952000
  custom_metrics: {}
  date: 2021-10-09_12-02-42
  done: false
  episode_len_mean: 368.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5500
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.6759698179033067
          entropy_coeff: 0.009999999999999998
          kl: 0.01231763502526684
          policy_loss: -0.1440185770806339
          total_loss: -0.14611047274536557
          vf_explained_var: -0.5787044763565063
          vf_loss: 0.0002517916129565694
    num_agent_steps_sampled: 1952000
    num_agent_steps_trained: 1952000
    num_steps_sampled: 1952000
    num_steps_trained: 1952000
  iterations_since_restore: 1952
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1952,49079.3,1952000,-0.07,0,-4,368.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1953000
  custom_metrics: {}
  date: 2021-10-09_12-03-06
  done: false
  episode_len_mean: 368.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5503
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.858473531405131
          entropy_coeff: 0.009999999999999998
          kl: 0.011184009172189011
          policy_loss: -0.12989174077908197
          total_loss: -0.13512682774000698
          vf_explained_var: -0.3039233088493347
          vf_loss: 0.00026037967327283693
    num_agent_steps_sampled: 1953000
    num_agent_steps_trained: 1953000
    num_steps_sampled: 1953000
    num_steps_trained: 1953000
  iterations_since_restore: 1953

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1953,49102.7,1953000,-0.07,0,-4,368.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1954000
  custom_metrics: {}
  date: 2021-10-09_12-03-28
  done: false
  episode_len_mean: 369.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5505
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.8959277947743733
          entropy_coeff: 0.009999999999999998
          kl: 0.012258842647512798
          policy_loss: -0.10866675556947787
          total_loss: -0.1130697741276688
          vf_explained_var: -0.9859586358070374
          vf_loss: 0.00020905124102783804
    num_agent_steps_sampled: 1954000
    num_agent_steps_trained: 1954000
    num_steps_sampled: 1954000
    num_steps_trained: 1954000
  iterations_since_restore: 1954

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1954,49124.3,1954000,-0.07,0,-4,369.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1955000
  custom_metrics: {}
  date: 2021-10-09_12-03-47
  done: false
  episode_len_mean: 370.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5507
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.170355586138209
          cur_lr: 5.000000000000001e-05
          entropy: 1.5092904845873514
          entropy_coeff: 0.009999999999999998
          kl: 0.0016520804458379679
          policy_loss: -0.1388882881237401
          total_loss: -0.15189510616991256
          vf_explained_var: -0.8574508428573608
          vf_loss: 0.00015256440068090645
    num_agent_steps_sampled: 1955000
    num_agent_steps_trained: 1955000
    num_steps_sampled: 1955000
    num_steps_trained: 1955000
  iterations_since_restore: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1955,49144,1955000,-0.07,0,-4,370.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1956000
  custom_metrics: {}
  date: 2021-10-09_12-04-07
  done: false
  episode_len_mean: 373.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 5510
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 1.4900010625521343
          entropy_coeff: 0.009999999999999998
          kl: 0.011399185223127509
          policy_loss: -0.023249187372210954
          total_loss: -0.030891700978908275
          vf_explained_var: -0.818719744682312
          vf_loss: 0.0005869456803743055
    num_agent_steps_sampled: 1956000
    num_agent_steps_trained: 1956000
    num_steps_sampled: 1956000
    num_steps_trained: 1956000
  iterations_since_restore: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1956,49163.3,1956000,-0.08,0,-4,373.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1957000
  custom_metrics: {}
  date: 2021-10-09_12-04-31
  done: false
  episode_len_mean: 373.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 5512
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 1.8390393442577786
          entropy_coeff: 0.009999999999999998
          kl: 0.01407777009732377
          policy_loss: -0.11412446503010061
          total_loss: -0.12411882655902041
          vf_explained_var: -0.8751005530357361
          vf_loss: 0.00015803353574786645
    num_agent_steps_sampled: 1957000
    num_agent_steps_trained: 1957000
    num_steps_sampled: 1957000
    num_steps_trained: 1957000
  iterations_since_restore: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1957,49187.6,1957000,-0.08,0,-4,373.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1958000
  custom_metrics: {}
  date: 2021-10-09_12-04-52
  done: false
  episode_len_mean: 373.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5515
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 1.6451684753100078
          entropy_coeff: 0.009999999999999998
          kl: 0.019002588065525492
          policy_loss: -0.013714930084016587
          total_loss: 0.14261363479826186
          vf_explained_var: -0.023502768948674202
          vf_loss: 0.16166035838670925
    num_agent_steps_sampled: 1958000
    num_agent_steps_trained: 1958000
    num_steps_sampled: 1958000
    num_steps_trained: 1958000
  iterations_since_restore: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1958,49208.6,1958000,-0.16,0,-8,373.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1959000
  custom_metrics: {}
  date: 2021-10-09_12-05-14
  done: false
  episode_len_mean: 373.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5517
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 1.9230814668867322
          entropy_coeff: 0.009999999999999998
          kl: 0.020972458236783044
          policy_loss: -0.09173763034244378
          total_loss: -0.08797138548559613
          vf_explained_var: -0.07600544393062592
          vf_loss: 0.010724443859524197
    num_agent_steps_sampled: 1959000
    num_agent_steps_trained: 1959000
    num_steps_sampled: 1959000
    num_steps_trained: 1959000
  iterations_since_restore: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1959,49230.9,1959000,-0.16,0,-8,373.79




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1960000
  custom_metrics: {}
  date: 2021-10-09_12-05-53
  done: false
  episode_len_mean: 374.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5520
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8663825074831644
          entropy_coeff: 0.009999999999999998
          kl: 0.01808587380046358
          policy_loss: -0.086105677485466
          total_loss: -0.08149394061830309
          vf_explained_var: -0.06971295922994614
          vf_loss: 0.007400385781915652
    num_agent_steps_sampled: 1960000
    num_agent_steps_trained: 1960000
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
  iterations_since_restore: 1960
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1960,49270.1,1960000,-0.16,0,-8,374.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1961000
  custom_metrics: {}
  date: 2021-10-09_12-06-17
  done: false
  episode_len_mean: 376.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5523
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.6832452045546638
          entropy_coeff: 0.009999999999999998
          kl: 0.012507649189284504
          policy_loss: -0.10459925747579998
          total_loss: -0.10741334325737423
          vf_explained_var: -0.46684613823890686
          vf_loss: 0.003039567526947293
    num_agent_steps_sampled: 1961000
    num_agent_steps_trained: 1961000
    num_steps_sampled: 1961000
    num_steps_trained: 1961000
  iterations_since_restore: 196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1961,49294.1,1961000,-0.16,0,-8,376.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1962000
  custom_metrics: {}
  date: 2021-10-09_12-06-43
  done: false
  episode_len_mean: 375.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5526
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.7358807590272691
          entropy_coeff: 0.009999999999999998
          kl: 0.016109779351619856
          policy_loss: -0.10918529255108701
          total_loss: -0.11001710411575105
          vf_explained_var: -0.663603663444519
          vf_loss: 0.0023863673158403898
    num_agent_steps_sampled: 1962000
    num_agent_steps_trained: 1962000
    num_steps_sampled: 1962000
    num_steps_trained: 1962000
  iterations_since_restore: 1962

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1962,49319.5,1962000,-0.12,0,-8,375.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1963000
  custom_metrics: {}
  date: 2021-10-09_12-07-07
  done: false
  episode_len_mean: 374.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5529
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.7689669794506497
          entropy_coeff: 0.009999999999999998
          kl: 0.014160487506361522
          policy_loss: -0.03881240640249517
          total_loss: -0.042614374972052046
          vf_explained_var: -0.48723724484443665
          vf_loss: 0.0014580991791768205
    num_agent_steps_sampled: 1963000
    num_agent_steps_trained: 1963000
    num_steps_sampled: 1963000
    num_steps_trained: 1963000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1963,49343.1,1963000,-0.12,0,-8,374.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1964000
  custom_metrics: {}
  date: 2021-10-09_12-07-29
  done: false
  episode_len_mean: 376.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5531
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8782804701063367
          entropy_coeff: 0.009999999999999998
          kl: 0.013809075964015705
          policy_loss: -0.04885954596102238
          total_loss: -0.05318801627597875
          vf_explained_var: -0.9622889757156372
          vf_loss: 0.0023331865241440633
    num_agent_steps_sampled: 1964000
    num_agent_steps_trained: 1964000
    num_steps_sampled: 1964000
    num_steps_trained: 1964000
  iterations_since_restore: 196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1964,49365.7,1964000,-0.12,0,-8,376.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1965000
  custom_metrics: {}
  date: 2021-10-09_12-07-54
  done: false
  episode_len_mean: 374.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5534
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.784146409564548
          entropy_coeff: 0.009999999999999998
          kl: 0.016996634363156696
          policy_loss: -0.13676838018000126
          total_loss: -0.13836837886936135
          vf_explained_var: -0.5514724254608154
          vf_loss: 0.0013223825442966902
    num_agent_steps_sampled: 1965000
    num_agent_steps_trained: 1965000
    num_steps_sampled: 1965000
    num_steps_trained: 1965000
  iterations_since_restore: 1965

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1965,49390.3,1965000,-0.12,0,-8,374.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1966000
  custom_metrics: {}
  date: 2021-10-09_12-08-20
  done: false
  episode_len_mean: 372.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 4
  episodes_total: 5538
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.821553897857666
          entropy_coeff: 0.009999999999999998
          kl: 0.014469446366076019
          policy_loss: -0.08094109340260426
          total_loss: -0.08562229743434323
          vf_explained_var: -0.7288224697113037
          vf_loss: 0.0008335381946785169
    num_agent_steps_sampled: 1966000
    num_agent_steps_trained: 1966000
    num_steps_sampled: 1966000
    num_steps_trained: 1966000
  iterations_since_restore: 1966

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1966,49416.5,1966000,-0.12,0,-8,372.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1967000
  custom_metrics: {}
  date: 2021-10-09_12-08-44
  done: false
  episode_len_mean: 371.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5540
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8259077774153816
          entropy_coeff: 0.009999999999999998
          kl: 0.011414251840364464
          policy_loss: -0.05739931333810091
          total_loss: -0.06448256946686241
          vf_explained_var: -0.6525043845176697
          vf_loss: 0.0011567693035532203
    num_agent_steps_sampled: 1967000
    num_agent_steps_trained: 1967000
    num_steps_sampled: 1967000
    num_steps_trained: 1967000
  iterations_since_restore: 196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1967,49440.3,1967000,-0.12,0,-8,371.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1968000
  custom_metrics: {}
  date: 2021-10-09_12-09-08
  done: false
  episode_len_mean: 369.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5543
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8899018247922261
          entropy_coeff: 0.009999999999999998
          kl: 0.013671275295756185
          policy_loss: -0.0900052392648326
          total_loss: -0.09596727664271991
          vf_explained_var: -0.29984521865844727
          vf_loss: 0.0009367941867417863
    num_agent_steps_sampled: 1968000
    num_agent_steps_trained: 1968000
    num_steps_sampled: 1968000
    num_steps_trained: 1968000
  iterations_since_restore: 196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1968,49465,1968000,-0.12,0,-8,369.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1969000
  custom_metrics: {}
  date: 2021-10-09_12-09-32
  done: false
  episode_len_mean: 368.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5546
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.929209257496728
          entropy_coeff: 0.009999999999999998
          kl: 0.01354571450351596
          policy_loss: -0.04497311111125681
          total_loss: -0.05179031172560321
          vf_explained_var: -1.0
          vf_loss: 0.000584915332405621
    num_agent_steps_sampled: 1969000
    num_agent_steps_trained: 1969000
    num_steps_sampled: 1969000
    num_steps_trained: 1969000
  iterations_since_restore: 1969
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1969,49488.6,1969000,-0.12,0,-8,368.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1970000
  custom_metrics: {}
  date: 2021-10-09_12-09-55
  done: false
  episode_len_mean: 368.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5549
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.7908336400985718
          entropy_coeff: 0.009999999999999998
          kl: 0.01644869245663957
          policy_loss: -0.03847887012072736
          total_loss: -0.04127506459545758
          vf_explained_var: -0.7206903100013733
          vf_loss: 0.0006740276038827789
    num_agent_steps_sampled: 1970000
    num_agent_steps_trained: 1970000
    num_steps_sampled: 1970000
    num_steps_trained: 1970000
  iterations_since_restore: 1970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1970,49511.3,1970000,-0.12,0,-8,368.93




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1971000
  custom_metrics: {}
  date: 2021-10-09_12-10-32
  done: false
  episode_len_mean: 370.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5551
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.6349102748764885
          entropy_coeff: 0.009999999999999998
          kl: 0.01200714757267423
          policy_loss: -0.057618632995420034
          total_loss: -0.0630222620235549
          vf_explained_var: -0.6065210103988647
          vf_loss: 0.0004060014965943992
    num_agent_steps_sampled: 1971000
    num_agent_steps_trained: 1971000
    num_steps_sampled: 1971000
    num_steps_trained: 1971000
  iterations_since_restore: 1971

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1971,49548.5,1971000,-0.12,0,-8,370.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1972000
  custom_metrics: {}
  date: 2021-10-09_12-10-56
  done: false
  episode_len_mean: 370.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5554
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8639269762569004
          entropy_coeff: 0.009999999999999998
          kl: 0.0124874585104975
          policy_loss: -0.08994095996022225
          total_loss: -0.09706874729858504
          vf_explained_var: -0.6790918111801147
          vf_loss: 0.0005504100864830737
    num_agent_steps_sampled: 1972000
    num_agent_steps_trained: 1972000
    num_steps_sampled: 1972000
    num_steps_trained: 1972000
  iterations_since_restore: 1972


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1972,49572.1,1972000,-0.09,0,-8,370.11


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1973000
  custom_metrics: {}
  date: 2021-10-09_12-11-17
  done: false
  episode_len_mean: 371.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5557
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.90783621735043
          entropy_coeff: 0.009999999999999998
          kl: 0.012058666077223605
          policy_loss: -0.06766108282738262
          total_loss: -0.07557669271611506
          vf_explained_var: -0.9968888163566589
          vf_loss: 0.0005780567208098041
    num_agent_steps_sampled: 1973000
    num_agent_steps_trained: 1973000
    num_steps_sampled: 1973000
    num_steps_trained: 1973000
  iterations_since_restore: 1973


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1973,49593.8,1973000,-0.09,0,-8,371.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1974000
  custom_metrics: {}
  date: 2021-10-09_12-11-40
  done: false
  episode_len_mean: 371.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5559
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8484606583913168
          entropy_coeff: 0.009999999999999998
          kl: 0.01632231688797915
          policy_loss: -0.101593941801952
          total_loss: -0.10524940635595056
          vf_explained_var: -0.3664243817329407
          vf_loss: 0.0005019570304688791
    num_agent_steps_sampled: 1974000
    num_agent_steps_trained: 1974000
    num_steps_sampled: 1974000
    num_steps_trained: 1974000
  iterations_since_restore: 1974
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1974,49616.4,1974000,-0.09,0,-8,371.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1975000
  custom_metrics: {}
  date: 2021-10-09_12-12-05
  done: false
  episode_len_mean: 369.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5562
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.927897744708591
          entropy_coeff: 0.009999999999999998
          kl: 0.014914471922479243
          policy_loss: -0.09703356275955836
          total_loss: -0.10279810060229566
          vf_explained_var: -0.7106285691261292
          vf_loss: 0.0004230121892760508
    num_agent_steps_sampled: 1975000
    num_agent_steps_trained: 1975000
    num_steps_sampled: 1975000
    num_steps_trained: 1975000
  iterations_since_restore: 1975

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1975,49641.5,1975000,-0.09,0,-8,369.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1976000
  custom_metrics: {}
  date: 2021-10-09_12-12-28
  done: false
  episode_len_mean: 369.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5565
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.7573845598432754
          entropy_coeff: 0.009999999999999998
          kl: 0.012724941268984638
          policy_loss: -0.09023567291183604
          total_loss: -0.09628523325340616
          vf_explained_var: -0.5538390278816223
          vf_loss: 0.00035475473252720094
    num_agent_steps_sampled: 1976000
    num_agent_steps_trained: 1976000
    num_steps_sampled: 1976000
    num_steps_trained: 1976000
  iterations_since_restore: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1976,49664.4,1976000,-0.09,0,-8,369.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1977000
  custom_metrics: {}
  date: 2021-10-09_12-12-53
  done: false
  episode_len_mean: 368.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5568
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8777666896036566
          cur_lr: 5.000000000000001e-05
          entropy: 1.8362327178319295
          entropy_coeff: 0.009999999999999998
          kl: 0.0033591086601615634
          policy_loss: -0.24372249758905834
          total_loss: -0.2588237944576475
          vf_explained_var: -0.7936791181564331
          vf_loss: 0.0003125164534948352
    num_agent_steps_sampled: 1977000
    num_agent_steps_trained: 1977000
    num_steps_sampled: 1977000
    num_steps_trained: 1977000
  iterations_since_restore: 197

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1977,49689.8,1977000,-0.15,0,-8,368.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1978000
  custom_metrics: {}
  date: 2021-10-09_12-13-17
  done: false
  episode_len_mean: 368.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5571
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.9860298567348056
          entropy_coeff: 0.009999999999999998
          kl: 0.016642030002168164
          policy_loss: -0.10922268715997537
          total_loss: -0.1215058259665966
          vf_explained_var: -0.5948227643966675
          vf_loss: 0.00027324970295820904
    num_agent_steps_sampled: 1978000
    num_agent_steps_trained: 1978000
    num_steps_sampled: 1978000
    num_steps_trained: 1978000
  iterations_since_restore: 197

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1978,49713.7,1978000,-0.15,0,-8,368.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1979000
  custom_metrics: {}
  date: 2021-10-09_12-13-40
  done: false
  episode_len_mean: 369.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5573
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.7406379646725125
          entropy_coeff: 0.009999999999999998
          kl: 0.01840284227517096
          policy_loss: -0.13790255518009265
          total_loss: -0.14690757060630455
          vf_explained_var: -0.9995673298835754
          vf_loss: 0.0003246624606415733
    num_agent_steps_sampled: 1979000
    num_agent_steps_trained: 1979000
    num_steps_sampled: 1979000
    num_steps_trained: 1979000
  iterations_since_restore: 1979

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1979,49735.9,1979000,-0.15,0,-8,369.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1980000
  custom_metrics: {}
  date: 2021-10-09_12-14-05
  done: false
  episode_len_mean: 369.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5576
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.9453522867626614
          entropy_coeff: 0.009999999999999998
          kl: 0.01414211239231133
          policy_loss: -0.014491823895110025
          total_loss: -0.027507391137381394
          vf_explained_var: -0.9887564778327942
          vf_loss: 0.00023121845863190377
    num_agent_steps_sampled: 1980000
    num_agent_steps_trained: 1980000
    num_steps_sampled: 1980000
    num_steps_trained: 1980000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1980,49761,1980000,-0.15,0,-8,369.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1981000
  custom_metrics: {}
  date: 2021-10-09_12-14-29
  done: false
  episode_len_mean: 369.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5579
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.8262767129474216
          entropy_coeff: 0.009999999999999998
          kl: 0.01947108868191875
          policy_loss: -0.1256257575419214
          total_loss: -0.13509416484998332
          vf_explained_var: -0.937052845954895
          vf_loss: 0.0002488235827412508
    num_agent_steps_sampled: 1981000
    num_agent_steps_trained: 1981000
    num_steps_sampled: 1981000
    num_steps_trained: 1981000
  iterations_since_restore: 1981
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1981,49784.9,1981000,-0.15,0,-8,369.12




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1982000
  custom_metrics: {}
  date: 2021-10-09_12-15-10
  done: false
  episode_len_mean: 368.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5582
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.9237346715397305
          entropy_coeff: 0.009999999999999998
          kl: 0.0166516466742134
          policy_loss: -0.08396974911706315
          total_loss: -0.09568242229935196
          vf_explained_var: -0.9953760504722595
          vf_loss: 0.0002165433559437386
    num_agent_steps_sampled: 1982000
    num_agent_steps_trained: 1982000
    num_steps_sampled: 1982000
    num_steps_trained: 1982000
  iterations_since_restore: 1982
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1982,49825.7,1982000,-0.15,0,-8,368.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1983000
  custom_metrics: {}
  date: 2021-10-09_12-15-34
  done: false
  episode_len_mean: 366.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5585
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.9067283696598476
          entropy_coeff: 0.009999999999999998
          kl: 0.011543291924284342
          policy_loss: -0.047997714051355916
          total_loss: -0.06180438446915812
          vf_explained_var: -0.9557004570960999
          vf_loss: 0.0001944545776091723
    num_agent_steps_sampled: 1983000
    num_agent_steps_trained: 1983000
    num_steps_sampled: 1983000
    num_steps_trained: 1983000
  iterations_since_restore: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1983,49850.4,1983000,-0.15,0,-8,366.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1984000
  custom_metrics: {}
  date: 2021-10-09_12-15-59
  done: false
  episode_len_mean: 365.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5588
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 1.885337652100457
          entropy_coeff: 0.009999999999999998
          kl: 0.021516393181099228
          policy_loss: -0.08660156652331352
          total_loss: -0.09584476744963064
          vf_explained_var: -0.6238231658935547
          vf_loss: 0.00016698696660265948
    num_agent_steps_sampled: 1984000
    num_agent_steps_trained: 1984000
    num_steps_sampled: 1984000
    num_steps_trained: 1984000
  iterations_since_restore: 198

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1984,49875.5,1984000,-0.15,0,-8,365.24


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1985000
  custom_metrics: {}
  date: 2021-10-09_12-16-28
  done: false
  episode_len_mean: 363.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5591
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.3906862888071272
          entropy_coeff: 0.009999999999999998
          kl: 0.015251982441554861
          policy_loss: -0.06537329488330418
          total_loss: -0.06909116895662414
          vf_explained_var: -0.0036432796623557806
          vf_loss: 0.00014822943101333092
    num_agent_steps_sampled: 1985000
    num_agent_steps_trained: 1985000
    num_steps_sampled: 1985000
    num_steps_trained: 1985000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1985,49904.4,1985000,-0.15,0,-8,363.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1986000
  custom_metrics: {}
  date: 2021-10-09_12-16-52
  done: false
  episode_len_mean: 362.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5594
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.884452153576745
          entropy_coeff: 0.009999999999999998
          kl: 0.016690438803112388
          policy_loss: -0.05762475803494453
          total_loss: -0.06535844288559424
          vf_explained_var: -0.7188370823860168
          vf_loss: 0.0001231018783275633
    num_agent_steps_sampled: 1986000
    num_agent_steps_trained: 1986000
    num_steps_sampled: 1986000
    num_steps_trained: 1986000
  iterations_since_restore: 1986

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1986,49928.2,1986000,-0.15,0,-8,362.14


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1987000
  custom_metrics: {}
  date: 2021-10-09_12-17-15
  done: false
  episode_len_mean: 360.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5597
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.9403224600685967
          entropy_coeff: 0.009999999999999998
          kl: 0.016638680472569344
          policy_loss: -0.10399788553929991
          total_loss: -0.11230530579470926
          vf_explained_var: -0.74505615234375
          vf_loss: 0.0001421465144731984
    num_agent_steps_sampled: 1987000
    num_agent_steps_trained: 1987000
    num_steps_sampled: 1987000
    num_steps_trained: 1987000
  iterations_since_restore: 1987


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1987,49951.1,1987000,-0.15,0,-8,360.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1988000
  custom_metrics: {}
  date: 2021-10-09_12-17-38
  done: false
  episode_len_mean: 360.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5599
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.814727853404151
          entropy_coeff: 0.009999999999999998
          kl: 0.011495292694032204
          policy_loss: -0.12523206683496635
          total_loss: -0.1356905076238844
          vf_explained_var: -0.7911949157714844
          vf_loss: 0.00012119879655882767
    num_agent_steps_sampled: 1988000
    num_agent_steps_trained: 1988000
    num_steps_sampled: 1988000
    num_steps_trained: 1988000
  iterations_since_restore: 1988

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1988,49973.6,1988000,-0.15,0,-8,360.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1989000
  custom_metrics: {}
  date: 2021-10-09_12-18-01
  done: false
  episode_len_mean: 362.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5602
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.6558804008695813
          entropy_coeff: 0.009999999999999998
          kl: 0.009019607249868224
          policy_loss: -0.06744931120839384
          total_loss: -0.07769188887129228
          vf_explained_var: -0.252159982919693
          vf_loss: 0.00037838861212043817
    num_agent_steps_sampled: 1989000
    num_agent_steps_trained: 1989000
    num_steps_sampled: 1989000
    num_steps_trained: 1989000
  iterations_since_restore: 198

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1989,49997.2,1989000,-0.15,0,-8,362.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1990000
  custom_metrics: {}
  date: 2021-10-09_12-18-23
  done: false
  episode_len_mean: 361.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5605
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.825295873483022
          entropy_coeff: 0.009999999999999998
          kl: 0.014740933158390382
          policy_loss: -0.13940771967172622
          total_loss: -0.14778498611930344
          vf_explained_var: -0.7887616753578186
          vf_loss: 0.00017136713900577484
    num_agent_steps_sampled: 1990000
    num_agent_steps_trained: 1990000
    num_steps_sampled: 1990000
    num_steps_trained: 1990000
  iterations_since_restore: 199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1990,50019.2,1990000,-0.15,0,-8,361.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1991000
  custom_metrics: {}
  date: 2021-10-09_12-18-46
  done: false
  episode_len_mean: 360.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 5607
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.8887629045380487
          entropy_coeff: 0.009999999999999998
          kl: 0.014053064186193827
          policy_loss: -0.02430531457066536
          total_loss: -0.033821191183394855
          vf_explained_var: -0.7112700343132019
          vf_loss: 0.00012026805327170425
    num_agent_steps_sampled: 1991000
    num_agent_steps_trained: 1991000
    num_steps_sampled: 1991000
    num_steps_trained: 1991000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1991,50042.4,1991000,-0.15,0,-8,360.11




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1992000
  custom_metrics: {}
  date: 2021-10-09_12-19-28
  done: false
  episode_len_mean: 356.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5610
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.6523869487974379
          entropy_coeff: 0.009999999999999998
          kl: 0.01319801451129155
          policy_loss: -0.05037367112106747
          total_loss: -0.05812143699990378
          vf_explained_var: -0.17889907956123352
          vf_loss: 8.752064030785631e-05
    num_agent_steps_sampled: 1992000
    num_agent_steps_trained: 1992000
    num_steps_sampled: 1992000
    num_steps_trained: 1992000
  iterations_since_restore: 199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1992,50084.1,1992000,-0.14,0,-8,356.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1993000
  custom_metrics: {}
  date: 2021-10-09_12-19-52
  done: false
  episode_len_mean: 357.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 5613
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.697068460782369
          entropy_coeff: 0.009999999999999998
          kl: 0.013934494476115195
          policy_loss: -0.08821281455457211
          total_loss: -0.09590373221370908
          vf_explained_var: -0.5881301760673523
          vf_loss: 0.00010634031241352204
    num_agent_steps_sampled: 1993000
    num_agent_steps_trained: 1993000
    num_steps_sampled: 1993000
    num_steps_trained: 1993000
  iterations_since_restore: 199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1993,50108.4,1993000,-0.14,0,-8,357.18


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1994000
  custom_metrics: {}
  date: 2021-10-09_12-20-14
  done: false
  episode_len_mean: 356.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5616
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.8601754943529765
          entropy_coeff: 0.009999999999999998
          kl: 0.015795633760603542
          policy_loss: -0.11793437231745985
          total_loss: -0.1260305751942926
          vf_explained_var: -0.42926907539367676
          vf_loss: 0.0001068920262292118
    num_agent_steps_sampled: 1994000
    num_agent_steps_trained: 1994000
    num_steps_sampled: 1994000
    num_steps_trained: 1994000
  iterations_since_restore: 199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1994,50130.1,1994000,-0.06,0,-6,356.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1995000
  custom_metrics: {}
  date: 2021-10-09_12-20-36
  done: false
  episode_len_mean: 356.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5618
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.9169013606177436
          entropy_coeff: 0.009999999999999998
          kl: 0.013704160315923955
          policy_loss: -0.13337069294518894
          total_loss: -0.1433888700273302
          vf_explained_var: -0.5957433581352234
          vf_loss: 0.0001290437079862588
    num_agent_steps_sampled: 1995000
    num_agent_steps_trained: 1995000
    num_steps_sampled: 1995000
    num_steps_trained: 1995000
  iterations_since_restore: 1995

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1995,50151.9,1995000,-0.06,0,-6,356.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1996000
  custom_metrics: {}
  date: 2021-10-09_12-20-59
  done: false
  episode_len_mean: 356.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5621
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.7472947941886054
          entropy_coeff: 0.009999999999999998
          kl: 0.022678798141453124
          policy_loss: -0.06685262094769213
          total_loss: -0.06929148178961542
          vf_explained_var: -0.15157510340213776
          vf_loss: 0.00010406404681917694
    num_agent_steps_sampled: 1996000
    num_agent_steps_trained: 1996000
    num_steps_sampled: 1996000
    num_steps_trained: 1996000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1996,50174.8,1996000,-0.06,0,-6,356.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1997000
  custom_metrics: {}
  date: 2021-10-09_12-21-20
  done: false
  episode_len_mean: 358.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5624
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8233797126346165
          entropy_coeff: 0.009999999999999998
          kl: 0.01253902041237759
          policy_loss: -0.08679197907654776
          total_loss: -0.09255778270049228
          vf_explained_var: -0.7512184977531433
          vf_loss: 8.586726301776556e-05
    num_agent_steps_sampled: 1997000
    num_agent_steps_trained: 1997000
    num_steps_sampled: 1997000
    num_steps_trained: 1997000
  iterations_since_restore: 1997

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1997,50196,1997000,-0.06,0,-6,358.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1998000
  custom_metrics: {}
  date: 2021-10-09_12-21-41
  done: false
  episode_len_mean: 359.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5626
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8380653262138367
          entropy_coeff: 0.009999999999999998
          kl: 0.014738125852036828
          policy_loss: -0.0519325800653961
          total_loss: -0.05555744572646088
          vf_explained_var: -0.815466582775116
          vf_loss: 0.000202067129753737
    num_agent_steps_sampled: 1998000
    num_agent_steps_trained: 1998000
    num_steps_sampled: 1998000
    num_steps_trained: 1998000
  iterations_since_restore: 1998
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1998,50216.9,1998000,-0.06,0,-6,359.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 1999000
  custom_metrics: {}
  date: 2021-10-09_12-22-04
  done: false
  episode_len_mean: 359.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5629
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.6560738020473056
          entropy_coeff: 0.009999999999999998
          kl: 0.006245015398021023
          policy_loss: 0.002683037022749583
          total_loss: -0.0015044551756646899
          vf_explained_var: -0.46573173999786377
          vf_loss: 0.006206369507385211
    num_agent_steps_sampled: 1999000
    num_agent_steps_trained: 1999000
    num_steps_sampled: 1999000
    num_steps_trained: 1999000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,1999,50240,1999000,-0.07,0,-6,359.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2000000
  custom_metrics: {}
  date: 2021-10-09_12-22-28
  done: false
  episode_len_mean: 359.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5632
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.6562365783585442
          entropy_coeff: 0.009999999999999998
          kl: 0.011958403946929902
          policy_loss: -0.06984726885954538
          total_loss: -0.07307088391648399
          vf_explained_var: -0.1653352677822113
          vf_loss: 0.0015299754097617955
    num_agent_steps_sampled: 2000000
    num_agent_steps_trained: 2000000
    num_steps_sampled: 2000000
    num_steps_trained: 2000000
  iterations_since_restore: 2000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2000,50263.6,2000000,-0.07,0,-6,359


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2001000
  custom_metrics: {}
  date: 2021-10-09_12-22-50
  done: false
  episode_len_mean: 359.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5634
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8516344335344104
          entropy_coeff: 0.009999999999999998
          kl: 0.012154418203304079
          policy_loss: -0.07304834918015533
          total_loss: -0.07836638076437843
          vf_explained_var: -0.12873171269893646
          vf_loss: 0.0011959756760107768
    num_agent_steps_sampled: 2001000
    num_agent_steps_trained: 2001000
    num_steps_sampled: 2001000
    num_steps_trained: 2001000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2001,50285.7,2001000,-0.07,0,-6,359.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2002000
  custom_metrics: {}
  date: 2021-10-09_12-23-11
  done: false
  episode_len_mean: 361.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5637
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7569140010409885
          entropy_coeff: 0.009999999999999998
          kl: 0.01400988724873999
          policy_loss: -0.041255442265214194
          total_loss: -0.044475319283083084
          vf_explained_var: -0.6263261437416077
          vf_loss: 0.000514674137876783
    num_agent_steps_sampled: 2002000
    num_agent_steps_trained: 2002000
    num_steps_sampled: 2002000
    num_steps_trained: 2002000
  iterations_since_restore: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2002,50307.2,2002000,-0.07,0,-6,361.69




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2003000
  custom_metrics: {}
  date: 2021-10-09_12-23-52
  done: false
  episode_len_mean: 362.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5640
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 2.0137259721755982
          entropy_coeff: 0.009999999999999998
          kl: 0.014893852814491106
          policy_loss: -0.10879349389837847
          total_loss: -0.11383328321907256
          vf_explained_var: -0.19956524670124054
          vf_loss: 0.0003899741097282256
    num_agent_steps_sampled: 2003000
    num_agent_steps_trained: 2003000
    num_steps_sampled: 2003000
    num_steps_trained: 2003000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2003,50347.3,2003000,-0.07,0,-6,362.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2004000
  custom_metrics: {}
  date: 2021-10-09_12-24-12
  done: false
  episode_len_mean: 364.41
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5642
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7576370027330186
          entropy_coeff: 0.009999999999999998
          kl: 0.007308745958894154
          policy_loss: -0.05627592061128881
          total_loss: -0.048688389992134436
          vf_explained_var: -0.46855005621910095
          vf_loss: 0.01794660494520536
    num_agent_steps_sampled: 2004000
    num_agent_steps_trained: 2004000
    num_steps_sampled: 2004000
    num_steps_trained: 2004000
  iterations_since_restore: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2004,50368.2,2004000,-0.06,1,-6,364.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2005000
  custom_metrics: {}
  date: 2021-10-09_12-24-35
  done: false
  episode_len_mean: 364.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5645
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8718875898255243
          entropy_coeff: 0.009999999999999998
          kl: 0.011781488997754627
          policy_loss: -0.06954183185266125
          total_loss: -0.07490136155651675
          vf_explained_var: -0.5808384418487549
          vf_loss: 0.0017252737074159087
    num_agent_steps_sampled: 2005000
    num_agent_steps_trained: 2005000
    num_steps_sampled: 2005000
    num_steps_trained: 2005000
  iterations_since_restore: 2005

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2005,50391.2,2005000,-0.06,1,-6,364.9


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2006000
  custom_metrics: {}
  date: 2021-10-09_12-24-58
  done: false
  episode_len_mean: 365.95
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5647
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.726134510835012
          entropy_coeff: 0.009999999999999998
          kl: 0.014836603578093922
          policy_loss: -0.10022606036315361
          total_loss: -0.10145471625857883
          vf_explained_var: -0.5063801407814026
          vf_loss: 0.0013817285620866136
    num_agent_steps_sampled: 2006000
    num_agent_steps_trained: 2006000
    num_steps_sampled: 2006000
    num_steps_trained: 2006000
  iterations_since_restore: 2006

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2006,50414.2,2006000,-0.06,1,-6,365.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2007000
  custom_metrics: {}
  date: 2021-10-09_12-25-20
  done: false
  episode_len_mean: 365.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5650
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.812605333328247
          entropy_coeff: 0.009999999999999998
          kl: 0.012563803325816438
          policy_loss: -0.10012648538168933
          total_loss: -0.10515531899614466
          vf_explained_var: -0.7115193605422974
          vf_loss: 0.0006906179543067184
    num_agent_steps_sampled: 2007000
    num_agent_steps_trained: 2007000
    num_steps_sampled: 2007000
    num_steps_trained: 2007000
  iterations_since_restore: 2007

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2007,50435.9,2007000,-0.06,1,-6,365.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2008000
  custom_metrics: {}
  date: 2021-10-09_12-25-42
  done: false
  episode_len_mean: 364.88
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5653
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.9713704744974772
          entropy_coeff: 0.009999999999999998
          kl: 0.011720244087462176
          policy_loss: -0.09930769461724494
          total_loss: -0.10691968266748719
          vf_explained_var: -0.9301977157592773
          vf_loss: 0.0005281202866980392
    num_agent_steps_sampled: 2008000
    num_agent_steps_trained: 2008000
    num_steps_sampled: 2008000
    num_steps_trained: 2008000
  iterations_since_restore: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2008,50458.1,2008000,-0.06,1,-6,364.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2009000
  custom_metrics: {}
  date: 2021-10-09_12-26-04
  done: false
  episode_len_mean: 366.44
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5655
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7688380440076192
          entropy_coeff: 0.009999999999999998
          kl: 0.010684664133275342
          policy_loss: -0.04880904592573643
          total_loss: -0.05533608931841122
          vf_explained_var: -0.4727505147457123
          vf_loss: 0.0006103623792619652
    num_agent_steps_sampled: 2009000
    num_agent_steps_trained: 2009000
    num_steps_sampled: 2009000
    num_steps_trained: 2009000
  iterations_since_restore: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2009,50479.2,2009000,-0.06,1,-6,366.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2010000
  custom_metrics: {}
  date: 2021-10-09_12-26-27
  done: false
  episode_len_mean: 365.41
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.06
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5658
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.9758328133159213
          entropy_coeff: 0.009999999999999998
          kl: 0.0123613151812335
          policy_loss: -0.09011899514330758
          total_loss: -0.09741038204067283
          vf_explained_var: -0.9683566689491272
          vf_loss: 0.0002602953850226994
    num_agent_steps_sampled: 2010000
    num_agent_steps_trained: 2010000
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
  iterations_since_restore: 2010


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2010,50502.7,2010000,-0.06,1,-6,365.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2011000
  custom_metrics: {}
  date: 2021-10-09_12-26-49
  done: false
  episode_len_mean: 365.64
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 5660
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7422530916002061
          entropy_coeff: 0.009999999999999998
          kl: 0.006015595268450171
          policy_loss: -0.0042661193344328135
          total_loss: -0.008320175276862251
          vf_explained_var: -0.13638509809970856
          vf_loss: 0.007428154923642675
    num_agent_steps_sampled: 2011000
    num_agent_steps_trained: 2011000
    num_steps_sampled: 2011000
    num_steps_trained: 2011000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2011,50524.7,2011000,-0.07,1,-6,365.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2012000
  custom_metrics: {}
  date: 2021-10-09_12-27-12
  done: false
  episode_len_mean: 367.3
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5663
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.6888107895851134
          entropy_coeff: 0.009999999999999998
          kl: 0.013077241105987585
          policy_loss: -0.08187852423224184
          total_loss: -0.08463475762142075
          vf_explained_var: -0.010894040577113628
          vf_loss: 0.001218259560280583
    num_agent_steps_sampled: 2012000
    num_agent_steps_trained: 2012000
    num_steps_sampled: 2012000
    num_steps_trained: 2012000
  iterations_since_restore: 201

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2012,50547.2,2012000,-0.07,1,-6,367.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2013000
  custom_metrics: {}
  date: 2021-10-09_12-27-35
  done: false
  episode_len_mean: 367.39
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.07
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 5666
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8891282651159498
          entropy_coeff: 0.009999999999999998
          kl: 0.009635672490284462
          policy_loss: -0.05763453623900811
          total_loss: -0.06644749330977599
          vf_explained_var: -0.7690545916557312
          vf_loss: 0.0005632187252760762
    num_agent_steps_sampled: 2013000
    num_agent_steps_trained: 2013000
    num_steps_sampled: 2013000
    num_steps_trained: 2013000
  iterations_since_restore: 201

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2013,50571.1,2013000,-0.07,1,-6,367.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2014000
  custom_metrics: {}
  date: 2021-10-09_12-28-00
  done: false
  episode_len_mean: 366.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5669
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8357840922143724
          entropy_coeff: 0.009999999999999998
          kl: 0.014656493682717908
          policy_loss: -0.11405190771652593
          total_loss: -0.11749035774005784
          vf_explained_var: -0.05602703243494034
          vf_loss: 0.00044628694061733164
    num_agent_steps_sampled: 2014000
    num_agent_steps_trained: 2014000
    num_steps_sampled: 2014000
    num_steps_trained: 2014000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2014,50595.5,2014000,-0.01,1,-1,366.9




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2015000
  custom_metrics: {}
  date: 2021-10-09_12-28-41
  done: false
  episode_len_mean: 366.99
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5671
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8905036462677849
          entropy_coeff: 0.009999999999999998
          kl: 0.012651691651886152
          policy_loss: -0.06620146657029788
          total_loss: -0.07221055258479384
          vf_explained_var: -0.257269024848938
          vf_loss: 0.0004025605253344919
    num_agent_steps_sampled: 2015000
    num_agent_steps_trained: 2015000
    num_steps_sampled: 2015000
    num_steps_trained: 2015000
  iterations_since_restore: 2015

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2015,50636.6,2015000,-0.01,1,-1,366.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2016000
  custom_metrics: {}
  date: 2021-10-09_12-29-05
  done: false
  episode_len_mean: 365.48
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5674
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7135682503382366
          entropy_coeff: 0.009999999999999998
          kl: 0.011129552565104738
          policy_loss: -0.06861017536785867
          total_loss: -0.07449206411838531
          vf_explained_var: -0.4411073327064514
          vf_loss: 0.00026349753485798523
    num_agent_steps_sampled: 2016000
    num_agent_steps_trained: 2016000
    num_steps_sampled: 2016000
    num_steps_trained: 2016000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2016,50661,2016000,-0.01,1,-1,365.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2017000
  custom_metrics: {}
  date: 2021-10-09_12-29-28
  done: false
  episode_len_mean: 367.04
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5677
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.9326026426421272
          entropy_coeff: 0.009999999999999998
          kl: 0.01268612821337371
          policy_loss: -0.0547067802813318
          total_loss: -0.06126936342981126
          vf_explained_var: -0.26806673407554626
          vf_loss: 0.000236048665424783
    num_agent_steps_sampled: 2017000
    num_agent_steps_trained: 2017000
    num_steps_sampled: 2017000
    num_steps_trained: 2017000
  iterations_since_restore: 2017


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2017,50683.3,2017000,-0.01,1,-1,367.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2018000
  custom_metrics: {}
  date: 2021-10-09_12-29-51
  done: false
  episode_len_mean: 367.92
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5680
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8095858454704286
          entropy_coeff: 0.009999999999999998
          kl: 0.014787164109525363
          policy_loss: -0.053213548991415234
          total_loss: -0.05648126457300451
          vf_explained_var: -0.7749814987182617
          vf_loss: 0.00022600014635827393
    num_agent_steps_sampled: 2018000
    num_agent_steps_trained: 2018000
    num_steps_sampled: 2018000
    num_steps_trained: 2018000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2018,50706,2018000,-0.01,1,-1,367.92


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2019000
  custom_metrics: {}
  date: 2021-10-09_12-30-12
  done: false
  episode_len_mean: 368.29
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5682
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.5283239550060697
          entropy_coeff: 0.009999999999999998
          kl: 0.012111834875182638
          policy_loss: -0.10134678908313315
          total_loss: -0.1044964750814769
          vf_explained_var: -0.26900702714920044
          vf_loss: 0.00017326662467288164
    num_agent_steps_sampled: 2019000
    num_agent_steps_trained: 2019000
    num_steps_sampled: 2019000
    num_steps_trained: 2019000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2019,50727.1,2019000,-0.01,1,-1,368.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2020000
  custom_metrics: {}
  date: 2021-10-09_12-30-32
  done: false
  episode_len_mean: 370.47
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5684
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7528645820087856
          entropy_coeff: 0.009999999999999998
          kl: 0.0133339211072601
          policy_loss: -0.06159581767602099
          total_loss: -0.0657296909019351
          vf_explained_var: -0.7551716566085815
          vf_loss: 0.0002276889741349603
    num_agent_steps_sampled: 2020000
    num_agent_steps_trained: 2020000
    num_steps_sampled: 2020000
    num_steps_trained: 2020000
  iterations_since_restore: 2020
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2020,50747.7,2020000,-0.01,1,-1,370.47


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2021000
  custom_metrics: {}
  date: 2021-10-09_12-30-55
  done: false
  episode_len_mean: 371.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5687
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8526337689823575
          entropy_coeff: 0.009999999999999998
          kl: 0.014583038131026961
          policy_loss: -0.09861165831486383
          total_loss: -0.1026265992058648
          vf_explained_var: -0.42304784059524536
          vf_loss: 0.00011082842170758845
    num_agent_steps_sampled: 2021000
    num_agent_steps_trained: 2021000
    num_steps_sampled: 2021000
    num_steps_trained: 2021000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2021,50770.4,2021000,-0.01,1,-1,371.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2022000
  custom_metrics: {}
  date: 2021-10-09_12-31-17
  done: false
  episode_len_mean: 374.48
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5690
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.845025004280938
          entropy_coeff: 0.009999999999999998
          kl: 0.013750982135319528
          policy_loss: -0.10135789720548524
          total_loss: -0.10614293317000072
          vf_explained_var: -0.7463623285293579
          vf_loss: 8.628994433416261e-05
    num_agent_steps_sampled: 2022000
    num_agent_steps_trained: 2022000
    num_steps_sampled: 2022000
    num_steps_trained: 2022000
  iterations_since_restore: 2022

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2022,50792.5,2022000,-0.01,1,-1,374.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2023000
  custom_metrics: {}
  date: 2021-10-09_12-31-38
  done: false
  episode_len_mean: 376.15
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5692
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.791726913717058
          entropy_coeff: 0.009999999999999998
          kl: 0.011890119346535775
          policy_loss: -0.11925564511782594
          total_loss: -0.1252156012588077
          vf_explained_var: -0.8264185786247253
          vf_loss: 0.00021596647558100004
    num_agent_steps_sampled: 2023000
    num_agent_steps_trained: 2023000
    num_steps_sampled: 2023000
    num_steps_trained: 2023000
  iterations_since_restore: 2023

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2023,50813.3,2023000,-0.01,1,-1,376.15


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2024000
  custom_metrics: {}
  date: 2021-10-09_12-32-00
  done: false
  episode_len_mean: 377.52
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5695
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.976378651460012
          entropy_coeff: 0.009999999999999998
          kl: 0.011399233640359214
          policy_loss: -0.07454894710746077
          total_loss: -0.08299576518022352
          vf_explained_var: -0.5060751438140869
          vf_loss: 6.036647217923827e-05
    num_agent_steps_sampled: 2024000
    num_agent_steps_trained: 2024000
    num_steps_sampled: 2024000
    num_steps_trained: 2024000
  iterations_since_restore: 2024

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2024,50835.3,2024000,-0.01,1,-1,377.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2025000
  custom_metrics: {}
  date: 2021-10-09_12-32-26
  done: false
  episode_len_mean: 375.76
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5698
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225466410319011
          entropy_coeff: 0.009999999999999998
          kl: 0.013377385757979334
          policy_loss: -0.08337961801638206
          total_loss: -0.08834741144544549
          vf_explained_var: -0.5203840136528015
          vf_loss: 4.7666892108585064e-05
    num_agent_steps_sampled: 2025000
    num_agent_steps_trained: 2025000
    num_steps_sampled: 2025000
    num_steps_trained: 2025000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2025,50861.1,2025000,-0.01,1,-1,375.76




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2026000
  custom_metrics: {}
  date: 2021-10-09_12-33-07
  done: false
  episode_len_mean: 375.43
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5701
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7804693698883056
          entropy_coeff: 0.009999999999999998
          kl: 0.01096200008112681
          policy_loss: -0.0817012975923717
          total_loss: -0.08863366860896349
          vf_explained_var: -0.9809677004814148
          vf_loss: 4.7484242309615686e-05
    num_agent_steps_sampled: 2026000
    num_agent_steps_trained: 2026000
    num_steps_sampled: 2026000
    num_steps_trained: 2026000
  iterations_since_restore: 2026

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2026,50902.4,2026000,-0.01,1,-1,375.43


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2027000
  custom_metrics: {}
  date: 2021-10-09_12-33-30
  done: false
  episode_len_mean: 376.12
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5703
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7816889855596754
          entropy_coeff: 0.009999999999999998
          kl: 0.013224748165117263
          policy_loss: -0.07232284172334605
          total_loss: -0.07702979102937711
          vf_explained_var: -0.7435353398323059
          vf_loss: 5.066562474288124e-05
    num_agent_steps_sampled: 2027000
    num_agent_steps_trained: 2027000
    num_steps_sampled: 2027000
    num_steps_trained: 2027000
  iterations_since_restore: 202

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2027,50925.2,2027000,-0.01,1,-1,376.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2028000
  custom_metrics: {}
  date: 2021-10-09_12-33-52
  done: false
  episode_len_mean: 375.72
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5706
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8470142470465767
          entropy_coeff: 0.009999999999999998
          kl: 0.012659258822660193
          policy_loss: -0.14800470790101422
          total_loss: -0.15392476646229625
          vf_explained_var: -0.38336431980133057
          vf_loss: 4.922379927544777e-05
    num_agent_steps_sampled: 2028000
    num_agent_steps_trained: 2028000
    num_steps_sampled: 2028000
    num_steps_trained: 2028000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2028,50946.9,2028000,-0.01,1,-1,375.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2029000
  custom_metrics: {}
  date: 2021-10-09_12-34-14
  done: false
  episode_len_mean: 375.36
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5708
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8483118587070042
          entropy_coeff: 0.009999999999999998
          kl: 0.009672975388597432
          policy_loss: -0.10110270813521412
          total_loss: -0.10999327552401357
          vf_explained_var: -0.5489934086799622
          vf_loss: 4.060476795631176e-05
    num_agent_steps_sampled: 2029000
    num_agent_steps_trained: 2029000
    num_steps_sampled: 2029000
    num_steps_trained: 2029000
  iterations_since_restore: 202

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2029,50968.8,2029000,-0.01,1,-1,375.36


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2030000
  custom_metrics: {}
  date: 2021-10-09_12-34-35
  done: false
  episode_len_mean: 377.95
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5711
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7864477528466118
          entropy_coeff: 0.009999999999999998
          kl: 0.011044154379181235
          policy_loss: -0.13228893131017685
          total_loss: -0.1392224003871282
          vf_explained_var: -0.957585334777832
          vf_loss: 2.504266570516241e-05
    num_agent_steps_sampled: 2030000
    num_agent_steps_trained: 2030000
    num_steps_sampled: 2030000
    num_steps_trained: 2030000
  iterations_since_restore: 2030


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2030,50990,2030000,-0.01,1,-1,377.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2031000
  custom_metrics: {}
  date: 2021-10-09_12-34-58
  done: false
  episode_len_mean: 378.69
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5714
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8553775760862563
          entropy_coeff: 0.009999999999999998
          kl: 0.007189758049391405
          policy_loss: -0.06869560250391563
          total_loss: -0.08012056120981773
          vf_explained_var: -0.6731852889060974
          vf_loss: 2.9019699943293945e-05
    num_agent_steps_sampled: 2031000
    num_agent_steps_trained: 2031000
    num_steps_sampled: 2031000
    num_steps_trained: 2031000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2031,51013.1,2031000,-0.01,1,-1,378.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2032000
  custom_metrics: {}
  date: 2021-10-09_12-35-19
  done: false
  episode_len_mean: 378.99
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5716
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8969502157635159
          entropy_coeff: 0.009999999999999998
          kl: 0.009289670176110024
          policy_loss: -0.06543547475917472
          total_loss: -0.07518014632579353
          vf_explained_var: -0.49783915281295776
          vf_loss: 5.139748484200229e-05
    num_agent_steps_sampled: 2032000
    num_agent_steps_trained: 2032000
    num_steps_sampled: 2032000
    num_steps_trained: 2032000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2032,51034.4,2032000,-0.01,1,-1,378.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2033000
  custom_metrics: {}
  date: 2021-10-09_12-35-42
  done: false
  episode_len_mean: 378.6
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5719
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8452058951059978
          entropy_coeff: 0.009999999999999998
          kl: 0.01086050008250798
          policy_loss: -0.03775682479350103
          total_loss: -0.04545318019679851
          vf_explained_var: -0.7728259563446045
          vf_loss: 3.109479271693595e-05
    num_agent_steps_sampled: 2033000
    num_agent_steps_trained: 2033000
    num_steps_sampled: 2033000
    num_steps_trained: 2033000
  iterations_since_restore: 2033


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2033,51057.5,2033000,-0.01,1,-1,378.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2034000
  custom_metrics: {}
  date: 2021-10-09_12-36-04
  done: false
  episode_len_mean: 379.4
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 5721
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7337529460589092
          entropy_coeff: 0.009999999999999998
          kl: 0.01172453698678064
          policy_loss: -0.08933594594192173
          total_loss: -0.09507349420131908
          vf_explained_var: -0.3839508891105652
          vf_loss: 2.214947123017434e-05
    num_agent_steps_sampled: 2034000
    num_agent_steps_trained: 2034000
    num_steps_sampled: 2034000
    num_steps_trained: 2034000
  iterations_since_restore: 2034


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2034,51079.6,2034000,-0.01,1,-1,379.4


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2035000
  custom_metrics: {}
  date: 2021-10-09_12-36-29
  done: false
  episode_len_mean: 378.02
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5724
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.904777138763004
          entropy_coeff: 0.009999999999999998
          kl: 0.012046649051435902
          policy_loss: -0.09957333025005129
          total_loss: -0.10670990405811204
          vf_explained_var: -0.8264409303665161
          vf_loss: 1.528194451970598e-05
    num_agent_steps_sampled: 2035000
    num_agent_steps_trained: 2035000
    num_steps_sampled: 2035000
    num_steps_trained: 2035000
  iterations_since_restore: 2035

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2035,51104.5,2035000,-0.01,1,-1,378.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2036000
  custom_metrics: {}
  date: 2021-10-09_12-36-52
  done: false
  episode_len_mean: 376.53
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5727
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.6284319758415222
          entropy_coeff: 0.009999999999999998
          kl: 0.011900102737727814
          policy_loss: -0.06959946458745334
          total_loss: -0.07404829100188282
          vf_explained_var: -0.9995319843292236
          vf_loss: 8.429072341742641e-05
    num_agent_steps_sampled: 2036000
    num_agent_steps_trained: 2036000
    num_steps_sampled: 2036000
    num_steps_trained: 2036000
  iterations_since_restore: 2036


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2036,51126.7,2036000,0,1,-1,376.53




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2037000
  custom_metrics: {}
  date: 2021-10-09_12-37-32
  done: false
  episode_len_mean: 376.42
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 5730
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.650768646928999
          entropy_coeff: 0.009999999999999998
          kl: 0.012338375119393103
          policy_loss: -0.12365159317851067
          total_loss: -0.12783018856619796
          vf_explained_var: -0.9369844794273376
          vf_loss: 0.00014509946039551223
    num_agent_steps_sampled: 2037000
    num_agent_steps_trained: 2037000
    num_steps_sampled: 2037000
    num_steps_trained: 2037000
  iterations_since_restore: 2037


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2037,51167.1,2037000,0,1,-1,376.42


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2038000
  custom_metrics: {}
  date: 2021-10-09_12-37-54
  done: false
  episode_len_mean: 376.51
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5732
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7800921347406176
          entropy_coeff: 0.009999999999999998
          kl: 0.009747871101982379
          policy_loss: -0.04904198240902689
          total_loss: 0.04176838170323107
          vf_explained_var: -0.488058477640152
          vf_loss: 0.0989853873004904
    num_agent_steps_sampled: 2038000
    num_agent_steps_trained: 2038000
    num_steps_sampled: 2038000
    num_steps_trained: 2038000
  iterations_since_restore: 2038
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2038,51188.8,2038000,-0.02,1,-2,376.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2039000
  custom_metrics: {}
  date: 2021-10-09_12-38-15
  done: false
  episode_len_mean: 378.16
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5735
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8542791287104288
          entropy_coeff: 0.009999999999999998
          kl: 0.01182506311056229
          policy_loss: -0.05951621658686135
          total_loss: -0.06210274818456835
          vf_explained_var: -0.630979597568512
          vf_loss: 0.004279156095193078
    num_agent_steps_sampled: 2039000
    num_agent_steps_trained: 2039000
    num_steps_sampled: 2039000
    num_steps_trained: 2039000
  iterations_since_restore: 2039
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2039,51210.4,2039000,-0.02,1,-2,378.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2040000
  custom_metrics: {}
  date: 2021-10-09_12-38-39
  done: false
  episode_len_mean: 377.75
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5738
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.4449694249365064
          entropy_coeff: 0.009999999999999998
          kl: 0.011763111431674704
          policy_loss: -0.11901031012336413
          total_loss: -0.11457728797362911
          vf_explained_var: -0.10943351686000824
          vf_loss: 0.007266790254248513
    num_agent_steps_sampled: 2040000
    num_agent_steps_trained: 2040000
    num_steps_sampled: 2040000
    num_steps_trained: 2040000
  iterations_since_restore: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2040,51234.3,2040000,-0.02,1,-2,377.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2041000
  custom_metrics: {}
  date: 2021-10-09_12-39-03
  done: false
  episode_len_mean: 377.66
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5740
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.7318931923972236
          entropy_coeff: 0.009999999999999998
          kl: 0.010677639185533631
          policy_loss: -0.12205935907032754
          total_loss: -0.12571501268280877
          vf_explained_var: -0.6632877588272095
          vf_loss: 0.0031192414979967806
    num_agent_steps_sampled: 2041000
    num_agent_steps_trained: 2041000
    num_steps_sampled: 2041000
    num_steps_trained: 2041000
  iterations_since_restore: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2041,51257.9,2041000,-0.02,1,-2,377.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2042000
  custom_metrics: {}
  date: 2021-10-09_12-39-20
  done: false
  episode_len_mean: 377.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5742
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.5819920341173808
          entropy_coeff: 0.009999999999999998
          kl: 0.014032288681423723
          policy_loss: -0.07442378298276
          total_loss: -0.07279596020364099
          vf_explained_var: -0.12429942935705185
          vf_loss: 0.0035910350542205077
    num_agent_steps_sampled: 2042000
    num_agent_steps_trained: 2042000
    num_steps_sampled: 2042000
    num_steps_trained: 2042000
  iterations_since_restore: 2042
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2042,51275,2042000,-0.03,0,-2,377.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2043000
  custom_metrics: {}
  date: 2021-10-09_12-39-42
  done: false
  episode_len_mean: 379.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5745
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8057987689971924
          entropy_coeff: 0.009999999999999998
          kl: 0.011361099899830526
          policy_loss: -0.10445069608589014
          total_loss: -0.10864082713507944
          vf_explained_var: -0.5343583822250366
          vf_loss: 0.0026489125836330154
    num_agent_steps_sampled: 2043000
    num_agent_steps_trained: 2043000
    num_steps_sampled: 2043000
    num_steps_trained: 2043000
  iterations_since_restore: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2043,51297,2043000,-0.03,0,-2,379.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2044000
  custom_metrics: {}
  date: 2021-10-09_12-40-03
  done: false
  episode_len_mean: 379.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5747
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8682182497448392
          entropy_coeff: 0.009999999999999998
          kl: 0.012729657263485785
          policy_loss: -0.09517078817718559
          total_loss: -0.09970111259155803
          vf_explained_var: -0.5967465043067932
          vf_loss: 0.0015814778679567906
    num_agent_steps_sampled: 2044000
    num_agent_steps_trained: 2044000
    num_steps_sampled: 2044000
    num_steps_trained: 2044000
  iterations_since_restore: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2044,51318.4,2044000,-0.03,0,-2,379.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2045000
  custom_metrics: {}
  date: 2021-10-09_12-40-24
  done: false
  episode_len_mean: 381.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5750
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.6376112937927245
          entropy_coeff: 0.009999999999999998
          kl: 0.014661669944393664
          policy_loss: -0.08966008335765865
          total_loss: -0.08976177449027697
          vf_explained_var: -0.5070441961288452
          vf_loss: 0.001796203827123261
    num_agent_steps_sampled: 2045000
    num_agent_steps_trained: 2045000
    num_steps_sampled: 2045000
    num_steps_trained: 2045000
  iterations_since_restore: 2045

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2045,51338.9,2045000,-0.03,0,-2,381.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2046000
  custom_metrics: {}
  date: 2021-10-09_12-40-46
  done: false
  episode_len_mean: 380.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5752
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.479072830412123
          entropy_coeff: 0.009999999999999998
          kl: 0.011769791436236925
          policy_loss: -0.11187751529117425
          total_loss: -0.1134526631070508
          vf_explained_var: -0.4314938485622406
          vf_loss: 0.0015930593388879465
    num_agent_steps_sampled: 2046000
    num_agent_steps_trained: 2046000
    num_steps_sampled: 2046000
    num_steps_trained: 2046000
  iterations_since_restore: 2046


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2046,51360.8,2046000,-0.03,0,-2,380.48


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2047000
  custom_metrics: {}
  date: 2021-10-09_12-41-07
  done: false
  episode_len_mean: 381.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5755
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8819522261619568
          entropy_coeff: 0.009999999999999998
          kl: 0.013693065078263287
          policy_loss: -0.045249896765583095
          total_loss: -0.04865284363428752
          vf_explained_var: -0.8122304081916809
          vf_loss: 0.0018948425002033925
    num_agent_steps_sampled: 2047000
    num_agent_steps_trained: 2047000
    num_steps_sampled: 2047000
    num_steps_trained: 2047000
  iterations_since_restore: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2047,51381.8,2047000,-0.03,0,-2,381.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2048000
  custom_metrics: {}
  date: 2021-10-09_12-41-28
  done: false
  episode_len_mean: 382.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5757
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.836832877000173
          entropy_coeff: 0.009999999999999998
          kl: 0.012405664266283159
          policy_loss: -0.10997615704933802
          total_loss: -0.11514466123448477
          vf_explained_var: -0.925048291683197
          vf_loss: 0.000949384617463996
    num_agent_steps_sampled: 2048000
    num_agent_steps_trained: 2048000
    num_steps_sampled: 2048000
    num_steps_trained: 2048000
  iterations_since_restore: 2048
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2048,51402.8,2048000,-0.03,0,-2,382.37




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2049000
  custom_metrics: {}
  date: 2021-10-09_12-42-07
  done: false
  episode_len_mean: 381.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5760
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8175850868225099
          entropy_coeff: 0.009999999999999998
          kl: 0.01631042821283566
          policy_loss: -0.10608623979820145
          total_loss: -0.10721097634070449
          vf_explained_var: -0.28554290533065796
          vf_loss: 0.0009447678842762899
    num_agent_steps_sampled: 2049000
    num_agent_steps_trained: 2049000
    num_steps_sampled: 2049000
    num_steps_trained: 2049000
  iterations_since_restore: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2049,51441.4,2049000,-0.02,0,-2,381.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2050000
  custom_metrics: {}
  date: 2021-10-09_12-42-28
  done: false
  episode_len_mean: 381.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5762
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.938770474327935
          entropy_coeff: 0.009999999999999998
          kl: 0.013518711912545318
          policy_loss: -0.07896214086148474
          total_loss: -0.08425194598320458
          vf_explained_var: -0.8283137083053589
          vf_loss: 0.0007483391763849391
    num_agent_steps_sampled: 2050000
    num_agent_steps_trained: 2050000
    num_steps_sampled: 2050000
    num_steps_trained: 2050000
  iterations_since_restore: 2050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2050,51462.9,2050000,-0.02,0,-2,381.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2051000
  custom_metrics: {}
  date: 2021-10-09_12-42-50
  done: false
  episode_len_mean: 382.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5765
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8671265959739685
          entropy_coeff: 0.009999999999999998
          kl: 0.012773950676331698
          policy_loss: -0.10502919577476051
          total_loss: -0.11055765739745564
          vf_explained_var: -0.9502900242805481
          vf_loss: 0.0005286864868442839
    num_agent_steps_sampled: 2051000
    num_agent_steps_trained: 2051000
    num_steps_sampled: 2051000
    num_steps_trained: 2051000
  iterations_since_restore: 205

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2051,51485.1,2051000,-0.02,0,-2,382.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2052000
  custom_metrics: {}
  date: 2021-10-09_12-43-11
  done: false
  episode_len_mean: 383.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5767
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.9084938539399041
          entropy_coeff: 0.009999999999999998
          kl: 0.007627692604806846
          policy_loss: -0.07208869800799422
          total_loss: -0.061149670142266487
          vf_explained_var: -0.49521464109420776
          vf_loss: 0.02249171351108493
    num_agent_steps_sampled: 2052000
    num_agent_steps_trained: 2052000
    num_steps_sampled: 2052000
    num_steps_trained: 2052000
  iterations_since_restore: 205

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2052,51505.3,2052000,-0.01,1,-2,383.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2053000
  custom_metrics: {}
  date: 2021-10-09_12-43-32
  done: false
  episode_len_mean: 385.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 5770
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.8437383479542202
          entropy_coeff: 0.009999999999999998
          kl: 0.014433204887732585
          policy_loss: -0.11502337414357397
          total_loss: -0.11702947500679228
          vf_explained_var: -0.3019748628139496
          vf_loss: 0.0021786740463640955
    num_agent_steps_sampled: 2053000
    num_agent_steps_trained: 2053000
    num_steps_sampled: 2053000
    num_steps_trained: 2053000
  iterations_since_restore: 205

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2053,51527.1,2053000,-0.01,1,-2,385.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2054000
  custom_metrics: {}
  date: 2021-10-09_12-43-53
  done: false
  episode_len_mean: 386.16
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 5772
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.9701302170753479
          entropy_coeff: 0.009999999999999998
          kl: 0.012145621450007128
          policy_loss: -0.10107903716464837
          total_loss: -0.10725577908257643
          vf_explained_var: -0.5437471866607666
          vf_loss: 0.0015309106590898915
    num_agent_steps_sampled: 2054000
    num_agent_steps_trained: 2054000
    num_steps_sampled: 2054000
    num_steps_trained: 2054000
  iterations_since_restore: 205

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2054,51548.1,2054000,-0.01,1,-2,386.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2055000
  custom_metrics: {}
  date: 2021-10-09_12-44-17
  done: false
  episode_len_mean: 387.37
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5775
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 1.559263935354021
          entropy_coeff: 0.009999999999999998
          kl: 0.031018305387040337
          policy_loss: -0.009402890337838067
          total_loss: 0.17269020477930705
          vf_explained_var: -0.472915381193161
          vf_loss: 0.16705554054480873
    num_agent_steps_sampled: 2055000
    num_agent_steps_trained: 2055000
    num_steps_sampled: 2055000
    num_steps_trained: 2055000
  iterations_since_restore: 2055
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2055,51571.3,2055000,-0.08,1,-7,387.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2056000
  custom_metrics: {}
  date: 2021-10-09_12-44-38
  done: false
  episode_len_mean: 387.95
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5777
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.989099054866367
          entropy_coeff: 0.009999999999999998
          kl: 0.009137103175925176
          policy_loss: -0.05457560399340259
          total_loss: -0.04747199695557356
          vf_explained_var: -0.1162208840250969
          vf_loss: 0.01346043361764815
    num_agent_steps_sampled: 2056000
    num_agent_steps_trained: 2056000
    num_steps_sampled: 2056000
    num_steps_trained: 2056000
  iterations_since_restore: 2056
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2056,51592.7,2056000,-0.08,1,-7,387.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2057000
  custom_metrics: {}
  date: 2021-10-09_12-44-59
  done: false
  episode_len_mean: 388.6
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5780
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9040249268213907
          entropy_coeff: 0.009999999999999998
          kl: 0.00857532899236525
          policy_loss: -0.12022446199423736
          total_loss: -0.12172605722314782
          vf_explained_var: -0.46246618032455444
          vf_loss: 0.004836606602960577
    num_agent_steps_sampled: 2057000
    num_agent_steps_trained: 2057000
    num_steps_sampled: 2057000
    num_steps_trained: 2057000
  iterations_since_restore: 2057
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2057,51613.7,2057000,-0.08,1,-7,388.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2058000
  custom_metrics: {}
  date: 2021-10-09_12-45-21
  done: false
  episode_len_mean: 388.56
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5782
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.869678787390391
          entropy_coeff: 0.009999999999999998
          kl: 0.011181023691801389
          policy_loss: -0.1326150473828117
          total_loss: -0.13002208140161303
          vf_explained_var: -0.5279316306114197
          vf_loss: 0.004728071916744941
    num_agent_steps_sampled: 2058000
    num_agent_steps_trained: 2058000
    num_steps_sampled: 2058000
    num_steps_trained: 2058000
  iterations_since_restore: 2058
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2058,51635.7,2058000,-0.08,1,-7,388.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2059000
  custom_metrics: {}
  date: 2021-10-09_12-45-42
  done: false
  episode_len_mean: 388.79
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5785
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9331870436668397
          entropy_coeff: 0.009999999999999998
          kl: 0.00977406603019458
          policy_loss: -0.05566326868202951
          total_loss: -0.05723932022228837
          vf_explained_var: -0.5934510827064514
          vf_loss: 0.0032781658202616706
    num_agent_steps_sampled: 2059000
    num_agent_steps_trained: 2059000
    num_steps_sampled: 2059000
    num_steps_trained: 2059000
  iterations_since_restore: 2059
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2059,51656.3,2059000,-0.08,1,-7,388.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2060000
  custom_metrics: {}
  date: 2021-10-09_12-46-03
  done: false
  episode_len_mean: 389.44
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5787
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.816780561870999
          entropy_coeff: 0.009999999999999998
          kl: 0.010807990836369423
          policy_loss: -0.061598467371530005
          total_loss: -0.06106168969223897
          vf_explained_var: -0.3305671513080597
          vf_loss: 0.0026954498297224443
    num_agent_steps_sampled: 2060000
    num_agent_steps_trained: 2060000
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
  iterations_since_restore: 2060


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2060,51677.9,2060000,-0.08,1,-7,389.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2061000
  custom_metrics: {}
  date: 2021-10-09_12-46-43
  done: false
  episode_len_mean: 388.29
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5790
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8467997272809347
          entropy_coeff: 0.009999999999999998
          kl: 0.009052269709030369
          policy_loss: -0.13142488412559034
          total_loss: -0.1345793298135201
          vf_explained_var: -0.8309382796287537
          vf_loss: 0.001905044166293616
    num_agent_steps_sampled: 2061000
    num_agent_steps_trained: 2061000
    num_steps_sampled: 2061000
    num_steps_trained: 2061000
  iterations_since_restore: 2061
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2061,51717.8,2061000,-0.08,1,-7,388.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2062000
  custom_metrics: {}
  date: 2021-10-09_12-47-09
  done: false
  episode_len_mean: 386.8
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5793
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8622460577223037
          entropy_coeff: 0.009999999999999998
          kl: 0.012531218343013843
          policy_loss: -0.07392392274406222
          total_loss: -0.0717318984783358
          vf_explained_var: -0.5561681389808655
          vf_loss: 0.0022528504189621244
    num_agent_steps_sampled: 2062000
    num_agent_steps_trained: 2062000
    num_steps_sampled: 2062000
    num_steps_trained: 2062000
  iterations_since_restore: 2062
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2062,51743.2,2062000,-0.08,1,-7,386.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2063000
  custom_metrics: {}
  date: 2021-10-09_12-47-31
  done: false
  episode_len_mean: 387.68
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5795
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8883993320994907
          entropy_coeff: 0.009999999999999998
          kl: 0.007348257039898027
          policy_loss: -0.0620734642777178
          total_loss: -0.0679431060122119
          vf_explained_var: -1.0
          vf_loss: 0.002129882305032677
    num_agent_steps_sampled: 2063000
    num_agent_steps_trained: 2063000
    num_steps_sampled: 2063000
    num_steps_trained: 2063000
  iterations_since_restore: 2063
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2063,51765.1,2063000,-0.08,1,-7,387.68


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2064000
  custom_metrics: {}
  date: 2021-10-09_12-47-55
  done: false
  episode_len_mean: 389.96
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.08
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5798
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9947834293047586
          entropy_coeff: 0.009999999999999998
          kl: 0.009206526609528174
          policy_loss: -0.13931324895885255
          total_loss: -0.1441319838580158
          vf_explained_var: -1.0
          vf_loss: 0.0014921033567386784
    num_agent_steps_sampled: 2064000
    num_agent_steps_trained: 2064000
    num_steps_sampled: 2064000
    num_steps_trained: 2064000
  iterations_since_restore: 2064
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2064,51789,2064000,-0.08,1,-7,389.96


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2065000
  custom_metrics: {}
  date: 2021-10-09_12-48-17
  done: false
  episode_len_mean: 391.58
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5800
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.7333061496416728
          entropy_coeff: 0.009999999999999998
          kl: 0.009710227620742076
          policy_loss: -0.024610101348823972
          total_loss: 0.18595535796549584
          vf_explained_var: 0.023957334458827972
          vf_loss: 0.21351542678247723
    num_agent_steps_sampled: 2065000
    num_agent_steps_trained: 2065000
    num_steps_sampled: 2065000
    num_steps_trained: 2065000
  iterations_since_restore: 2065
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2065,51811.5,2065000,-0.12,1,-7,391.58


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2066000
  custom_metrics: {}
  date: 2021-10-09_12-48-41
  done: false
  episode_len_mean: 391.81
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5803
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.7948143667644925
          entropy_coeff: 0.009999999999999998
          kl: 0.011950805272198689
          policy_loss: -0.10580782801326778
          total_loss: -0.09596493987159596
          vf_explained_var: -0.4797576367855072
          vf_loss: 0.010089122069378694
    num_agent_steps_sampled: 2066000
    num_agent_steps_trained: 2066000
    num_steps_sampled: 2066000
    num_steps_trained: 2066000
  iterations_since_restore: 2066
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2066,51835.4,2066000,-0.12,1,-7,391.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2067000
  custom_metrics: {}
  date: 2021-10-09_12-49-05
  done: false
  episode_len_mean: 392.34
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5805
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8499368283483717
          entropy_coeff: 0.009999999999999998
          kl: 0.010172536848498778
          policy_loss: -0.09647543782161341
          total_loss: -0.08884984958502981
          vf_explained_var: -0.5221285223960876
          vf_loss: 0.011057074181735516
    num_agent_steps_sampled: 2067000
    num_agent_steps_trained: 2067000
    num_steps_sampled: 2067000
    num_steps_trained: 2067000
  iterations_since_restore: 2067
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2067,51859.8,2067000,-0.12,1,-7,392.34


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2068000
  custom_metrics: {}
  date: 2021-10-09_12-49-30
  done: false
  episode_len_mean: 392.72
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5808
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.6560445825258892
          entropy_coeff: 0.009999999999999998
          kl: 0.01007558853324488
          policy_loss: -0.09940360456498133
          total_loss: -0.08132213003312548
          vf_explained_var: -0.353442907333374
          vf_loss: 0.019717639739004273
    num_agent_steps_sampled: 2068000
    num_agent_steps_trained: 2068000
    num_steps_sampled: 2068000
    num_steps_trained: 2068000
  iterations_since_restore: 2068
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2068,51884.5,2068000,-0.12,1,-7,392.72


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2069000
  custom_metrics: {}
  date: 2021-10-09_12-49-54
  done: false
  episode_len_mean: 391.74
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5810
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.963567034403483
          entropy_coeff: 0.009999999999999998
          kl: 0.00929521830478518
          policy_loss: -0.10898954028056727
          total_loss: -0.11149112838837835
          vf_explained_var: -1.0
          vf_loss: 0.003365711744926456
    num_agent_steps_sampled: 2069000
    num_agent_steps_trained: 2069000
    num_steps_sampled: 2069000
    num_steps_trained: 2069000
  iterations_since_restore: 2069
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2069,51908.7,2069000,-0.12,1,-7,391.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2070000
  custom_metrics: {}
  date: 2021-10-09_12-50-17
  done: false
  episode_len_mean: 392.73
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5813
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.7875955992274815
          entropy_coeff: 0.009999999999999998
          kl: 0.008088363240257617
          policy_loss: -0.06500090135054457
          total_loss: -0.06917142226464218
          vf_explained_var: -0.6772421002388
          vf_loss: 0.0017246984837887187
    num_agent_steps_sampled: 2070000
    num_agent_steps_trained: 2070000
    num_steps_sampled: 2070000
    num_steps_trained: 2070000
  iterations_since_restore: 2070
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2070,51931,2070000,-0.12,1,-7,392.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2071000
  custom_metrics: {}
  date: 2021-10-09_12-50-40
  done: false
  episode_len_mean: 393.37
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5815
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8859228094418843
          entropy_coeff: 0.009999999999999998
          kl: 0.010237975675472062
          policy_loss: -0.08600346657137076
          total_loss: -0.08700960131569041
          vf_explained_var: -0.8325341939926147
          vf_loss: 0.002688280606849326
    num_agent_steps_sampled: 2071000
    num_agent_steps_trained: 2071000
    num_steps_sampled: 2071000
    num_steps_trained: 2071000
  iterations_since_restore: 2071
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2071,51953.9,2071000,-0.12,1,-7,393.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2072000
  custom_metrics: {}
  date: 2021-10-09_12-51-04
  done: false
  episode_len_mean: 392.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5818
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8675539467069837
          entropy_coeff: 0.009999999999999998
          kl: 0.009369225983252131
          policy_loss: -0.12846455797553064
          total_loss: -0.1302849422312445
          vf_explained_var: -1.0
          vf_loss: 0.0029771638594360814
    num_agent_steps_sampled: 2072000
    num_agent_steps_trained: 2072000
    num_steps_sampled: 2072000
    num_steps_trained: 2072000
  iterations_since_restore: 2072
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2072,51978.7,2072000,-0.12,1,-7,392.93




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2073000
  custom_metrics: {}
  date: 2021-10-09_12-51-47
  done: false
  episode_len_mean: 392.22
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.15
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5821
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.762568175792694
          entropy_coeff: 0.009999999999999998
          kl: 0.006126539882777177
          policy_loss: -0.01789852794673708
          total_loss: 0.11538302227854728
          vf_explained_var: 0.032939448952674866
          vf_loss: 0.14183240328242797
    num_agent_steps_sampled: 2073000
    num_agent_steps_trained: 2073000
    num_steps_sampled: 2073000
    num_steps_trained: 2073000
  iterations_since_restore: 2073
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2073,52021.3,2073000,-0.15,1,-7,392.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2074000
  custom_metrics: {}
  date: 2021-10-09_12-52-09
  done: false
  episode_len_mean: 393.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.15
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5823
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.894090431266361
          entropy_coeff: 0.009999999999999998
          kl: 0.009602211332707533
          policy_loss: -0.15273512932989333
          total_loss: -0.14611425853023927
          vf_explained_var: -0.40470564365386963
          vf_loss: 0.011338679451081487
    num_agent_steps_sampled: 2074000
    num_agent_steps_trained: 2074000
    num_steps_sampled: 2074000
    num_steps_trained: 2074000
  iterations_since_restore: 2074
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2074,52043.2,2074000,-0.15,1,-7,393.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2075000
  custom_metrics: {}
  date: 2021-10-09_12-52-29
  done: false
  episode_len_mean: 394.82
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.13
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5825
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8744233846664429
          entropy_coeff: 0.009999999999999998
          kl: 0.0075225681488074025
          policy_loss: -0.132917161203093
          total_loss: -0.1105122008257442
          vf_explained_var: -0.6357598304748535
          vf_loss: 0.030006531160324812
    num_agent_steps_sampled: 2075000
    num_agent_steps_trained: 2075000
    num_steps_sampled: 2075000
    num_steps_trained: 2075000
  iterations_since_restore: 2075
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2075,52063.7,2075000,-0.13,2,-7,394.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2076000
  custom_metrics: {}
  date: 2021-10-09_12-52-51
  done: false
  episode_len_mean: 395.82
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.13
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5828
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.896483830610911
          entropy_coeff: 0.009999999999999998
          kl: 0.009329270984659358
          policy_loss: -0.10186015132607686
          total_loss: -0.10191538186950816
          vf_explained_var: -1.0
          vf_loss: 0.005090800985797412
    num_agent_steps_sampled: 2076000
    num_agent_steps_trained: 2076000
    num_steps_sampled: 2076000
    num_steps_trained: 2076000
  iterations_since_restore: 2076
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2076,52085.6,2076000,-0.13,2,-7,395.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2077000
  custom_metrics: {}
  date: 2021-10-09_12-53-15
  done: false
  episode_len_mean: 396.89
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.13
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5831
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8820715016788907
          entropy_coeff: 0.009999999999999998
          kl: 0.009117234578245103
          policy_loss: -0.06562939658761024
          total_loss: -0.06724242634243435
          vf_explained_var: -0.9434484839439392
          vf_loss: 0.0037029504077509046
    num_agent_steps_sampled: 2077000
    num_agent_steps_trained: 2077000
    num_steps_sampled: 2077000
    num_steps_trained: 2077000
  iterations_since_restore: 2077


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2077,52109.1,2077000,-0.13,2,-7,396.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2078000
  custom_metrics: {}
  date: 2021-10-09_12-53-37
  done: false
  episode_len_mean: 396.02
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5833
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9485219015015496
          entropy_coeff: 0.009999999999999998
          kl: 0.010329840793443878
          policy_loss: -0.09954576858629784
          total_loss: -0.10109597608033154
          vf_explained_var: -0.7294145226478577
          vf_loss: 0.002634126633509166
    num_agent_steps_sampled: 2078000
    num_agent_steps_trained: 2078000
    num_steps_sampled: 2078000
    num_steps_trained: 2078000
  iterations_since_restore: 2078
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2078,52130.8,2078000,-0.11,2,-7,396.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2079000
  custom_metrics: {}
  date: 2021-10-09_12-54-02
  done: false
  episode_len_mean: 394.91
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5836
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.741090268558926
          entropy_coeff: 0.009999999999999998
          kl: 0.01080640729083191
          policy_loss: -0.10061574884586864
          total_loss: -0.09886922260953321
          vf_explained_var: -0.982255220413208
          vf_loss: 0.0031506399611114628
    num_agent_steps_sampled: 2079000
    num_agent_steps_trained: 2079000
    num_steps_sampled: 2079000
    num_steps_trained: 2079000
  iterations_since_restore: 2079
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2079,52156.4,2079000,-0.11,2,-7,394.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2080000
  custom_metrics: {}
  date: 2021-10-09_12-54-25
  done: false
  episode_len_mean: 396.28
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5839
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8742169049051074
          entropy_coeff: 0.009999999999999998
          kl: 0.009330136100985145
          policy_loss: -0.09681526310741902
          total_loss: -0.09982337272829479
          vf_explained_var: -0.9701645970344543
          vf_loss: 0.0019139654190641724
    num_agent_steps_sampled: 2080000
    num_agent_steps_trained: 2080000
    num_steps_sampled: 2080000
    num_steps_trained: 2080000
  iterations_since_restore: 2080


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2080,52179,2080000,-0.11,2,-7,396.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2081000
  custom_metrics: {}
  date: 2021-10-09_12-54-49
  done: false
  episode_len_mean: 395.35
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5841
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.581530707412296
          entropy_coeff: 0.009999999999999998
          kl: 0.00883202417644156
          policy_loss: -0.06770092290308741
          total_loss: -0.06838216746432914
          vf_explained_var: -0.5879070162773132
          vf_loss: 0.0020517905249208624
    num_agent_steps_sampled: 2081000
    num_agent_steps_trained: 2081000
    num_steps_sampled: 2081000
    num_steps_trained: 2081000
  iterations_since_restore: 2081
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2081,52202.9,2081000,-0.11,2,-7,395.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2082000
  custom_metrics: {}
  date: 2021-10-09_12-55-11
  done: false
  episode_len_mean: 394.3
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5844
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.925503937403361
          entropy_coeff: 0.009999999999999998
          kl: 0.010324108626555924
          policy_loss: -0.16104594332476457
          total_loss: -0.1630627035266823
          vf_explained_var: -0.9898029565811157
          vf_loss: 0.001945883685645337
    num_agent_steps_sampled: 2082000
    num_agent_steps_trained: 2082000
    num_steps_sampled: 2082000
    num_steps_trained: 2082000
  iterations_since_restore: 2082
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2082,52224.8,2082000,-0.11,2,-7,394.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2083000
  custom_metrics: {}
  date: 2021-10-09_12-55-33
  done: false
  episode_len_mean: 394.28
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5846
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8344590094354418
          entropy_coeff: 0.009999999999999998
          kl: 0.010394240674300835
          policy_loss: -0.09126045985354317
          total_loss: -0.09250157819026046
          vf_explained_var: -0.8001855611801147
          vf_loss: 0.001707196993003082
    num_agent_steps_sampled: 2083000
    num_agent_steps_trained: 2083000
    num_steps_sampled: 2083000
    num_steps_trained: 2083000
  iterations_since_restore: 2083
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2083,52246.6,2083000,-0.11,2,-7,394.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2084000
  custom_metrics: {}
  date: 2021-10-09_12-55-56
  done: false
  episode_len_mean: 393.31
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5849
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9963778720961676
          entropy_coeff: 0.009999999999999998
          kl: 0.008678454756112607
          policy_loss: -0.04657152108848095
          total_loss: -0.051485989491144815
          vf_explained_var: -1.0
          vf_loss: 0.002194511023117229
    num_agent_steps_sampled: 2084000
    num_agent_steps_trained: 2084000
    num_steps_sampled: 2084000
    num_steps_trained: 2084000
  iterations_since_restore: 2084
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2084,52269.8,2084000,-0.11,2,-7,393.31




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2085000
  custom_metrics: {}
  date: 2021-10-09_12-56-34
  done: false
  episode_len_mean: 393.41
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5852
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9240094025929768
          entropy_coeff: 0.009999999999999998
          kl: 0.010759780228065096
          policy_loss: -0.05328365129729112
          total_loss: -0.05542310637732347
          vf_explained_var: -0.7825785279273987
          vf_loss: 0.0011629141537317386
    num_agent_steps_sampled: 2085000
    num_agent_steps_trained: 2085000
    num_steps_sampled: 2085000
    num_steps_trained: 2085000
  iterations_since_restore: 2085


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2085,52308.3,2085000,-0.11,2,-7,393.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2086000
  custom_metrics: {}
  date: 2021-10-09_12-56-57
  done: false
  episode_len_mean: 392.06
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.14
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5854
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8264972103966608
          entropy_coeff: 0.009999999999999998
          kl: 0.007312874618570141
          policy_loss: -0.020783129003312854
          total_loss: 0.027900448772642347
          vf_explained_var: -0.452264666557312
          vf_loss: 0.05611648980735077
    num_agent_steps_sampled: 2086000
    num_agent_steps_trained: 2086000
    num_steps_sampled: 2086000
    num_steps_trained: 2086000
  iterations_since_restore: 2086
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2086,52331.1,2086000,-0.14,2,-7,392.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2087000
  custom_metrics: {}
  date: 2021-10-09_12-57-20
  done: false
  episode_len_mean: 390.94
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.14
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5857
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9443266259299383
          entropy_coeff: 0.009999999999999998
          kl: 0.010458542410085335
          policy_loss: -0.09662580663959185
          total_loss: -0.0949357997212145
          vf_explained_var: -0.8561505079269409
          vf_loss: 0.00564175283200004
    num_agent_steps_sampled: 2087000
    num_agent_steps_trained: 2087000
    num_steps_sampled: 2087000
    num_steps_trained: 2087000
  iterations_since_restore: 2087
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2087,52353.6,2087000,-0.14,2,-7,390.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2088000
  custom_metrics: {}
  date: 2021-10-09_12-57-41
  done: false
  episode_len_mean: 391.5
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.14
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5859
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9077740258640714
          entropy_coeff: 0.009999999999999998
          kl: 0.008757023662254011
          policy_loss: -0.07948820095302331
          total_loss: -0.08204121481006345
          vf_explained_var: -1.0
          vf_loss: 0.00355354867513395
    num_agent_steps_sampled: 2088000
    num_agent_steps_trained: 2088000
    num_steps_sampled: 2088000
    num_steps_trained: 2088000
  iterations_since_restore: 2088
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2088,52374.8,2088000,-0.14,2,-7,391.5


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2089000
  custom_metrics: {}
  date: 2021-10-09_12-58-01
  done: false
  episode_len_mean: 393.02
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.14
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5862
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.9502361920144824
          entropy_coeff: 0.009999999999999998
          kl: 0.00920649102224727
          policy_loss: -0.07374671080874072
          total_loss: -0.07574493577703834
          vf_explained_var: -0.9683363437652588
          vf_loss: 0.0038671925250026917
    num_agent_steps_sampled: 2089000
    num_agent_steps_trained: 2089000
    num_steps_sampled: 2089000
    num_steps_trained: 2089000
  iterations_since_restore: 2089
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2089,52395.3,2089000,-0.14,2,-7,393.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2090000
  custom_metrics: {}
  date: 2021-10-09_12-58-23
  done: false
  episode_len_mean: 392.66
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.14
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5864
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.48123128870617
          cur_lr: 5.000000000000001e-05
          entropy: 1.8783457928233676
          entropy_coeff: 0.009999999999999998
          kl: 0.0030611526031586876
          policy_loss: -0.227309213578701
          total_loss: -0.23969803071684306
          vf_explained_var: -0.9988806843757629
          vf_loss: 0.0018603645911854174
    num_agent_steps_sampled: 2090000
    num_agent_steps_trained: 2090000
    num_steps_sampled: 2090000
    num_steps_trained: 2090000
  iterations_since_restore: 2090
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2090,52416.9,2090000,-0.14,2,-7,392.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2091000
  custom_metrics: {}
  date: 2021-10-09_12-58-45
  done: false
  episode_len_mean: 392.83
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5867
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.8552773502137927
          entropy_coeff: 0.009999999999999998
          kl: 0.012333167920527108
          policy_loss: -0.02506825543112225
          total_loss: 0.03415809555186166
          vf_explained_var: -0.4598597288131714
          vf_loss: 0.06864498517631243
    num_agent_steps_sampled: 2091000
    num_agent_steps_trained: 2091000
    num_steps_sampled: 2091000
    num_steps_trained: 2091000
  iterations_since_restore: 2091
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2091,52438.6,2091000,-0.2,2,-7,392.83


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2092000
  custom_metrics: {}
  date: 2021-10-09_12-59-08
  done: false
  episode_len_mean: 392.65
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 5869
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.882580812772115
          entropy_coeff: 0.009999999999999998
          kl: 0.015306428279454248
          policy_loss: -0.10778206090132396
          total_loss: -0.10994423011110889
          vf_explained_var: -0.7980407476425171
          vf_loss: 0.005327458317495055
    num_agent_steps_sampled: 2092000
    num_agent_steps_trained: 2092000
    num_steps_sampled: 2092000
    num_steps_trained: 2092000
  iterations_since_restore: 2092
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2092,52462.1,2092000,-0.2,2,-7,392.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2093000
  custom_metrics: {}
  date: 2021-10-09_12-59-31
  done: false
  episode_len_mean: 392.09
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 5872
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.7947681612438626
          entropy_coeff: 0.009999999999999998
          kl: 0.015858805916943434
          policy_loss: -0.13810851408375635
          total_loss: -0.14107046706808937
          vf_explained_var: -0.8789255619049072
          vf_loss: 0.0032404499516511956
    num_agent_steps_sampled: 2093000
    num_agent_steps_trained: 2093000
    num_steps_sampled: 2093000
    num_steps_trained: 2093000
  iterations_since_restore: 2093


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2093,52484.5,2093000,-0.2,2,-7,392.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2094000
  custom_metrics: {}
  date: 2021-10-09_12-59-52
  done: false
  episode_len_mean: 391.28
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5874
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.7692480855517918
          entropy_coeff: 0.009999999999999998
          kl: 0.01924529020865371
          policy_loss: 0.013818264835410647
          total_loss: 0.09996329115496741
          vf_explained_var: -0.1409323662519455
          vf_loss: 0.08958414053906583
    num_agent_steps_sampled: 2094000
    num_agent_steps_trained: 2094000
    num_steps_sampled: 2094000
    num_steps_trained: 2094000
  iterations_since_restore: 2094
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2094,52506,2094000,-0.23,2,-10,391.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2095000
  custom_metrics: {}
  date: 2021-10-09_13-00-15
  done: false
  episode_len_mean: 391.76
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5877
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.6809568060768976
          entropy_coeff: 0.009999999999999998
          kl: 0.013278498104602911
          policy_loss: -0.07874801138208973
          total_loss: -0.07371629807684156
          vf_explained_var: 0.44174879789352417
          vf_loss: 0.012007015281253391
    num_agent_steps_sampled: 2095000
    num_agent_steps_trained: 2095000
    num_steps_sampled: 2095000
    num_steps_trained: 2095000
  iterations_since_restore: 2095

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2095,52529.2,2095000,-0.23,2,-10,391.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2096000
  custom_metrics: {}
  date: 2021-10-09_13-00-37
  done: false
  episode_len_mean: 392.48
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5879
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 2.0112911740938824
          entropy_coeff: 0.009999999999999998
          kl: 0.012742637549921573
          policy_loss: -0.07433250115977394
          total_loss: -0.080599652375612
          vf_explained_var: -0.782436192035675
          vf_loss: 0.004408364479119579
    num_agent_steps_sampled: 2096000
    num_agent_steps_trained: 2096000
    num_steps_sampled: 2096000
    num_steps_trained: 2096000
  iterations_since_restore: 2096
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2096,52550.4,2096000,-0.23,2,-10,392.48




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2097000
  custom_metrics: {}
  date: 2021-10-09_13-01-16
  done: false
  episode_len_mean: 392.49
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5882
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.83610204855601
          entropy_coeff: 0.009999999999999998
          kl: 0.013226544770946091
          policy_loss: -0.07664407996667756
          total_loss: -0.07979922872036696
          vf_explained_var: -0.2734384536743164
          vf_loss: 0.005410084759609567
    num_agent_steps_sampled: 2097000
    num_agent_steps_trained: 2097000
    num_steps_sampled: 2097000
    num_steps_trained: 2097000
  iterations_since_restore: 2097
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2097,52589.3,2097000,-0.23,2,-10,392.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2098000
  custom_metrics: {}
  date: 2021-10-09_13-01-39
  done: false
  episode_len_mean: 390.67
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5885
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.9750734739833409
          entropy_coeff: 0.009999999999999998
          kl: 0.01420556518585799
          policy_loss: -0.16743886892994245
          total_loss: -0.1744172866973612
          vf_explained_var: -0.977588415145874
          vf_loss: 0.002251455375355565
    num_agent_steps_sampled: 2098000
    num_agent_steps_trained: 2098000
    num_steps_sampled: 2098000
    num_steps_trained: 2098000
  iterations_since_restore: 2098
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2098,52612.4,2098000,-0.23,2,-10,390.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2099000
  custom_metrics: {}
  date: 2021-10-09_13-02-01
  done: false
  episode_len_mean: 390.41
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5887
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.8820848915312025
          entropy_coeff: 0.009999999999999998
          kl: 0.012297981373745041
          policy_loss: -0.08758174296882418
          total_loss: -0.09524208841224512
          vf_explained_var: -0.9985701441764832
          vf_loss: 0.00205242647530718
    num_agent_steps_sampled: 2099000
    num_agent_steps_trained: 2099000
    num_steps_sampled: 2099000
    num_steps_trained: 2099000
  iterations_since_restore: 2099


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2099,52635,2099000,-0.23,2,-10,390.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2100000
  custom_metrics: {}
  date: 2021-10-09_13-02-23
  done: false
  episode_len_mean: 391.62
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5890
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.891798018084632
          entropy_coeff: 0.009999999999999998
          kl: 0.010666877979935999
          policy_loss: -0.05979271626306905
          total_loss: -0.06692203304005993
          vf_explained_var: -0.3511582612991333
          vf_loss: 0.0038886076046360865
    num_agent_steps_sampled: 2100000
    num_agent_steps_trained: 2100000
    num_steps_sampled: 2100000
    num_steps_trained: 2100000
  iterations_since_restore: 2100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2100,52656.4,2100000,-0.23,2,-10,391.62


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2101000
  custom_metrics: {}
  date: 2021-10-09_13-02-45
  done: false
  episode_len_mean: 391.88
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5892
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.9125240471627978
          entropy_coeff: 0.009999999999999998
          kl: 0.013041941059062761
          policy_loss: -0.13051965923772918
          total_loss: -0.13853065706789494
          vf_explained_var: -0.9998860955238342
          vf_loss: 0.0014551757181632437
    num_agent_steps_sampled: 2101000
    num_agent_steps_trained: 2101000
    num_steps_sampled: 2101000
    num_steps_trained: 2101000
  iterations_since_restore: 210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2101,52678.8,2101000,-0.23,2,-10,391.88


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2102000
  custom_metrics: {}
  date: 2021-10-09_13-03-08
  done: false
  episode_len_mean: 390.44
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5895
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.671811756822798
          entropy_coeff: 0.009999999999999998
          kl: 0.011834161396817835
          policy_loss: -0.11297331456508901
          total_loss: -0.11948512577348286
          vf_explained_var: -0.19028732180595398
          vf_loss: 0.001441736578514489
    num_agent_steps_sampled: 2102000
    num_agent_steps_trained: 2102000
    num_steps_sampled: 2102000
    num_steps_trained: 2102000
  iterations_since_restore: 2102

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2102,52702.1,2102000,-0.23,2,-10,390.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2103000
  custom_metrics: {}
  date: 2021-10-09_13-03-26
  done: false
  episode_len_mean: 392.65
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.23
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5897
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.164964022528794
          entropy_coeff: 0.009999999999999998
          kl: 0.009834403456464871
          policy_loss: -0.05897869616746902
          total_loss: -0.062215286824438304
          vf_explained_var: 0.09661649167537689
          vf_loss: 0.0011295343327623818
    num_agent_steps_sampled: 2103000
    num_agent_steps_trained: 2103000
    num_steps_sampled: 2103000
    num_steps_trained: 2103000
  iterations_since_restore: 210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2103,52719.5,2103000,-0.23,2,-10,392.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2104000
  custom_metrics: {}
  date: 2021-10-09_13-03-48
  done: false
  episode_len_mean: 392.23
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5900
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.9390805708037482
          entropy_coeff: 0.009999999999999998
          kl: 0.013566003976791318
          policy_loss: -0.10484187526421415
          total_loss: -0.11290890559968021
          vf_explained_var: -0.9872089624404907
          vf_loss: 0.0012765818301381337
    num_agent_steps_sampled: 2104000
    num_agent_steps_trained: 2104000
    num_steps_sampled: 2104000
    num_steps_trained: 2104000
  iterations_since_restore: 210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2104,52742.1,2104000,-0.19,2,-10,392.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2105000
  custom_metrics: {}
  date: 2021-10-09_13-04-11
  done: false
  episode_len_mean: 391.73
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5902
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.8331742035018073
          entropy_coeff: 0.009999999999999998
          kl: 0.017038223410351567
          policy_loss: -0.0622299180055658
          total_loss: -0.06669220191737016
          vf_explained_var: -0.8690477013587952
          vf_loss: 0.0012506825939959123
    num_agent_steps_sampled: 2105000
    num_agent_steps_trained: 2105000
    num_steps_sampled: 2105000
    num_steps_trained: 2105000
  iterations_since_restore: 2105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2105,52764.8,2105000,-0.19,2,-10,391.73


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2106000
  custom_metrics: {}
  date: 2021-10-09_13-04-31
  done: false
  episode_len_mean: 392.45
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5905
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.740615644353085
          cur_lr: 5.000000000000001e-05
          entropy: 1.7900573319858974
          entropy_coeff: 0.009999999999999998
          kl: 0.004137931576360603
          policy_loss: -0.22511866291364033
          total_loss: -0.23863721473349464
          vf_explained_var: -1.0
          vf_loss: 0.0013174073123890493
    num_agent_steps_sampled: 2106000
    num_agent_steps_trained: 2106000
    num_steps_sampled: 2106000
    num_steps_trained: 2106000
  iterations_since_restore: 2106
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2106,52785,2106000,-0.22,2,-10,392.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2107000
  custom_metrics: {}
  date: 2021-10-09_13-04-53
  done: false
  episode_len_mean: 393.33
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5907
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245987521277534
          entropy_coeff: 0.009999999999999998
          kl: 0.019959106547499317
          policy_loss: -0.07642363285024961
          total_loss: -0.08588265830443965
          vf_explained_var: -0.9965765476226807
          vf_loss: 0.001395948912249878
    num_agent_steps_sampled: 2107000
    num_agent_steps_trained: 2107000
    num_steps_sampled: 2107000
    num_steps_trained: 2107000
  iterations_since_restore: 210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2107,52806.4,2107000,-0.22,2,-10,393.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2108000
  custom_metrics: {}
  date: 2021-10-09_13-05-32
  done: false
  episode_len_mean: 393.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5910
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.9339538137118022
          entropy_coeff: 0.009999999999999998
          kl: 0.0196001860432947
          policy_loss: -0.08301415528274245
          total_loss: -0.09366988423797819
          vf_explained_var: -0.8288359642028809
          vf_loss: 0.0014257039499676063
    num_agent_steps_sampled: 2108000
    num_agent_steps_trained: 2108000
    num_steps_sampled: 2108000
    num_steps_trained: 2108000
  iterations_since_restore: 2108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2108,52845.3,2108000,-0.22,2,-10,393.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2109000
  custom_metrics: {}
  date: 2021-10-09_13-05-56
  done: false
  episode_len_mean: 392.85
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5912
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.027232807212406
          entropy_coeff: 0.009999999999999998
          kl: 0.01869595588973601
          policy_loss: -0.13463932904932235
          total_loss: -0.14754204406506485
          vf_explained_var: -0.9183622002601624
          vf_loss: 0.0004463539416772417
    num_agent_steps_sampled: 2109000
    num_agent_steps_trained: 2109000
    num_steps_sampled: 2109000
    num_steps_trained: 2109000
  iterations_since_restore: 2109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2109,52869.4,2109000,-0.22,2,-10,392.85


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2110000
  custom_metrics: {}
  date: 2021-10-09_13-06-19
  done: false
  episode_len_mean: 391.57
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5915
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.8671827793121338
          entropy_coeff: 0.009999999999999998
          kl: 0.018859473302260277
          policy_loss: -0.08951396234333515
          total_loss: -0.10014367459548844
          vf_explained_var: -1.0
          vf_loss: 0.0010583014479683091
    num_agent_steps_sampled: 2110000
    num_agent_steps_trained: 2110000
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
  iterations_since_restore: 2110
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2110,52892.1,2110000,-0.22,2,-10,391.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2111000
  custom_metrics: {}
  date: 2021-10-09_13-06-40
  done: false
  episode_len_mean: 391.66
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5917
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.8839651518397862
          entropy_coeff: 0.009999999999999998
          kl: 0.007436611645050206
          policy_loss: -0.20191884206400978
          total_loss: -0.21750318772262997
          vf_explained_var: -1.0
          vf_loss: 0.0005014703757802232
    num_agent_steps_sampled: 2111000
    num_agent_steps_trained: 2111000
    num_steps_sampled: 2111000
    num_steps_trained: 2111000
  iterations_since_restore: 2111
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2111,52913,2111000,-0.22,2,-10,391.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2112000
  custom_metrics: {}
  date: 2021-10-09_13-07-02
  done: false
  episode_len_mean: 393.2
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5920
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.9941971752378675
          entropy_coeff: 0.009999999999999998
          kl: 0.02444442852411491
          policy_loss: -0.09217169369674391
          total_loss: -0.10255687930103806
          vf_explained_var: -0.7277765274047852
          vf_loss: 0.0005048243752551368
    num_agent_steps_sampled: 2112000
    num_agent_steps_trained: 2112000
    num_steps_sampled: 2112000
    num_steps_trained: 2112000
  iterations_since_restore: 2112


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2112,52935.6,2112000,-0.2,2,-10,393.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2113000
  custom_metrics: {}
  date: 2021-10-09_13-07-26
  done: false
  episode_len_mean: 391.95
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5923
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.8781581865416632
          entropy_coeff: 0.009999999999999998
          kl: 0.01843870308580509
          policy_loss: -0.06394856814295054
          total_loss: -0.07199251326835818
          vf_explained_var: -0.8903119564056396
          vf_loss: 0.0004956447292998848
    num_agent_steps_sampled: 2113000
    num_agent_steps_trained: 2113000
    num_steps_sampled: 2113000
    num_steps_trained: 2113000
  iterations_since_restore: 2113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2113,52959.1,2113000,-0.2,2,-10,391.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2114000
  custom_metrics: {}
  date: 2021-10-09_13-07-49
  done: false
  episode_len_mean: 391.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5925
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.6933400882614984
          entropy_coeff: 0.009999999999999998
          kl: 0.014335178592499156
          policy_loss: -0.13077944252226087
          total_loss: -0.13921726257022884
          vf_explained_var: -0.9851080179214478
          vf_loss: 0.0005329394402603309
    num_agent_steps_sampled: 2114000
    num_agent_steps_trained: 2114000
    num_steps_sampled: 2114000
    num_steps_trained: 2114000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2114,52982.2,2114000,-0.22,0,-10,391.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2115000
  custom_metrics: {}
  date: 2021-10-09_13-08-09
  done: false
  episode_len_mean: 391.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5928
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.859547946188185
          entropy_coeff: 0.009999999999999998
          kl: 0.014926259410447774
          policy_loss: -0.06226653183499972
          total_loss: -0.07172129323912992
          vf_explained_var: -0.6327885389328003
          vf_loss: 0.0008497536547818325
    num_agent_steps_sampled: 2115000
    num_agent_steps_trained: 2115000
    num_steps_sampled: 2115000
    num_steps_trained: 2115000
  iterations_since_restore: 211

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2115,53002.5,2115000,-0.22,0,-10,391.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2116000
  custom_metrics: {}
  date: 2021-10-09_13-08-31
  done: false
  episode_len_mean: 391.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5930
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.8901581870185005
          entropy_coeff: 0.009999999999999998
          kl: 0.01565727935972673
          policy_loss: -0.0785613099940949
          total_loss: -0.08825115485944682
          vf_explained_var: -0.77471524477005
          vf_loss: 0.0005147184689930226
    num_agent_steps_sampled: 2116000
    num_agent_steps_trained: 2116000
    num_steps_sampled: 2116000
    num_steps_trained: 2116000
  iterations_since_restore: 2116
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2116,53024.2,2116000,-0.22,0,-10,391.54


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2117000
  custom_metrics: {}
  date: 2021-10-09_13-08-51
  done: false
  episode_len_mean: 392.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5932
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.8076548907491896
          entropy_coeff: 0.009999999999999998
          kl: 0.02018734930750282
          policy_loss: -0.08373185638338328
          total_loss: -0.09014311428699229
          vf_explained_var: -0.9372026920318604
          vf_loss: 0.0004519892442557547
    num_agent_steps_sampled: 2117000
    num_agent_steps_trained: 2117000
    num_steps_sampled: 2117000
    num_steps_trained: 2117000
  iterations_since_restore: 211

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2117,53044.1,2117000,-0.22,0,-10,392.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2118000
  custom_metrics: {}
  date: 2021-10-09_13-09-13
  done: false
  episode_len_mean: 394.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5935
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.8480560395452712
          entropy_coeff: 0.009999999999999998
          kl: 0.012099702453773927
          policy_loss: -0.04597724433988333
          total_loss: -0.054031876557403144
          vf_explained_var: -0.9560767412185669
          vf_loss: 0.00034454615152854887
    num_agent_steps_sampled: 2118000
    num_agent_steps_trained: 2118000
    num_steps_sampled: 2118000
    num_steps_trained: 2118000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2118,53066.2,2118000,-0.22,0,-10,394.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2119000
  custom_metrics: {}
  date: 2021-10-09_13-09-35
  done: false
  episode_len_mean: 394.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5937
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.9607969946331447
          entropy_coeff: 0.009999999999999998
          kl: 0.013032200009371525
          policy_loss: -0.042373804479009576
          total_loss: -0.05075990023712317
          vf_explained_var: -0.9616551995277405
          vf_loss: 0.00036354076485925664
    num_agent_steps_sampled: 2119000
    num_agent_steps_trained: 2119000
    num_steps_sampled: 2119000
    num_steps_trained: 2119000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2119,53088.3,2119000,-0.22,0,-10,394.31




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2120000
  custom_metrics: {}
  date: 2021-10-09_13-10-13
  done: false
  episode_len_mean: 394.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5940
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.8925766203138563
          entropy_coeff: 0.009999999999999998
          kl: 0.013878089969279195
          policy_loss: -0.09908078131783339
          total_loss: -0.10604670602414343
          vf_explained_var: -0.9279637932777405
          vf_loss: 0.0003967147286554488
    num_agent_steps_sampled: 2120000
    num_agent_steps_trained: 2120000
    num_steps_sampled: 2120000
    num_steps_trained: 2120000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2120,53125.9,2120000,-0.22,0,-10,394.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2121000
  custom_metrics: {}
  date: 2021-10-09_13-10-37
  done: false
  episode_len_mean: 395.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5943
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.7707218011220296
          entropy_coeff: 0.009999999999999998
          kl: 0.018450344697124116
          policy_loss: -0.1143579295112027
          total_loss: -0.11633967690997654
          vf_explained_var: -0.031238846480846405
          vf_loss: 0.0003527789640227436
    num_agent_steps_sampled: 2121000
    num_agent_steps_trained: 2121000
    num_steps_sampled: 2121000
    num_steps_trained: 2121000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2121,53150.3,2121000,-0.22,0,-10,395.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2122000
  custom_metrics: {}
  date: 2021-10-09_13-10-59
  done: false
  episode_len_mean: 394.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5945
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 2.0198078129026626
          entropy_coeff: 0.009999999999999998
          kl: 0.012141221083460134
          policy_loss: -0.1343994764611125
          total_loss: -0.144209530742632
          vf_explained_var: -0.998471736907959
          vf_loss: 0.0002720480938377376
    num_agent_steps_sampled: 2122000
    num_agent_steps_trained: 2122000
    num_steps_sampled: 2122000
    num_steps_trained: 2122000
  iterations_since_restore: 2122
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2122,53172.6,2122000,-0.22,0,-10,394.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2123000
  custom_metrics: {}
  date: 2021-10-09_13-11-20
  done: false
  episode_len_mean: 394.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5948
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.631498212284512
          entropy_coeff: 0.009999999999999998
          kl: 0.011022026566264604
          policy_loss: -0.09542390226076046
          total_loss: -0.10230832720796267
          vf_explained_var: -0.4574069082736969
          vf_loss: 0.0002470857610913097
    num_agent_steps_sampled: 2123000
    num_agent_steps_trained: 2123000
    num_steps_sampled: 2123000
    num_steps_trained: 2123000
  iterations_since_restore: 212

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2123,53193.2,2123000,-0.22,0,-10,394.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2124000
  custom_metrics: {}
  date: 2021-10-09_13-11-42
  done: false
  episode_len_mean: 396.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5950
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.8903453032175699
          entropy_coeff: 0.009999999999999998
          kl: 0.013429131717643135
          policy_loss: -0.16058624328838456
          total_loss: -0.16804272747702068
          vf_explained_var: -0.8318008184432983
          vf_loss: 0.0002579163350876317
    num_agent_steps_sampled: 2124000
    num_agent_steps_trained: 2124000
    num_steps_sampled: 2124000
    num_steps_trained: 2124000
  iterations_since_restore: 212

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2124,53215,2124000,-0.22,0,-10,396.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2125000
  custom_metrics: {}
  date: 2021-10-09_13-12-03
  done: false
  episode_len_mean: 395.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.27
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5953
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8331925998972204
          cur_lr: 5.000000000000001e-05
          entropy: 1.7641444590356614
          entropy_coeff: 0.009999999999999998
          kl: 0.0032514078249498662
          policy_loss: -0.20688833312855825
          total_loss: -0.22126356346739662
          vf_explained_var: -0.8058537840843201
          vf_loss: 0.0005571655841777101
    num_agent_steps_sampled: 2125000
    num_agent_steps_trained: 2125000
    num_steps_sampled: 2125000
    num_steps_trained: 2125000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2125,53235.7,2125000,-0.27,0,-10,395.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2126000
  custom_metrics: {}
  date: 2021-10-09_13-12-24
  done: false
  episode_len_mean: 397.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.24
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5955
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.8206394924057854
          entropy_coeff: 0.009999999999999998
          kl: 0.019638935648209198
          policy_loss: -0.10712951807719139
          total_loss: -0.11676956021951304
          vf_explained_var: -0.7270963788032532
          vf_loss: 0.00038484155520563946
    num_agent_steps_sampled: 2126000
    num_agent_steps_trained: 2126000
    num_steps_sampled: 2126000
    num_steps_trained: 2126000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2126,53257.4,2126000,-0.24,0,-10,397.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2127000
  custom_metrics: {}
  date: 2021-10-09_13-12-47
  done: false
  episode_len_mean: 396.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.24
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5958
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.7612851871384514
          entropy_coeff: 0.009999999999999998
          kl: 0.02551038550834025
          policy_loss: -0.05809625991516643
          total_loss: -0.06483442104525036
          vf_explained_var: -0.21081320941448212
          vf_loss: 0.0002471573319780873
    num_agent_steps_sampled: 2127000
    num_agent_steps_trained: 2127000
    num_steps_sampled: 2127000
    num_steps_trained: 2127000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2127,53280.4,2127000,-0.24,0,-10,396.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2128000
  custom_metrics: {}
  date: 2021-10-09_13-13-11
  done: false
  episode_len_mean: 394.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.24
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5961
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.889332648118337
          entropy_coeff: 0.009999999999999998
          kl: 0.015266913680267861
          policy_loss: -0.06802652049809695
          total_loss: -0.07720879132135047
          vf_explained_var: -0.7603703737258911
          vf_loss: 0.00017084364210151964
    num_agent_steps_sampled: 2128000
    num_agent_steps_trained: 2128000
    num_steps_sampled: 2128000
    num_steps_trained: 2128000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2128,53304.1,2128000,-0.24,0,-10,394.79


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2129000
  custom_metrics: {}
  date: 2021-10-09_13-13-31
  done: false
  episode_len_mean: 395.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.24
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5963
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.7056483493910894
          entropy_coeff: 0.009999999999999998
          kl: 0.01975670079488597
          policy_loss: -0.04687607406328122
          total_loss: -0.0513612721943193
          vf_explained_var: -0.9144331216812134
          vf_loss: 0.00022543543115413438
    num_agent_steps_sampled: 2129000
    num_agent_steps_trained: 2129000
    num_steps_sampled: 2129000
    num_steps_trained: 2129000
  iterations_since_restore: 212

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2129,53324.1,2129000,-0.24,0,-10,395.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2130000
  custom_metrics: {}
  date: 2021-10-09_13-13-52
  done: false
  episode_len_mean: 395.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5965
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.7779518829451666
          entropy_coeff: 0.009999999999999998
          kl: 0.01600657167304477
          policy_loss: -0.1016530481684539
          total_loss: -0.10922413286235598
          vf_explained_var: -0.5767924785614014
          vf_loss: 0.00020601577691397525
    num_agent_steps_sampled: 2130000
    num_agent_steps_trained: 2130000
    num_steps_sampled: 2130000
    num_steps_trained: 2130000
  iterations_since_restore: 213

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2130,53344.8,2130000,-0.19,0,-10,395.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2131000
  custom_metrics: {}
  date: 2021-10-09_13-14-13
  done: false
  episode_len_mean: 395.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5968
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9638240999645658
          entropy_coeff: 0.009999999999999998
          kl: 0.014086564481867134
          policy_loss: -0.12404989769889249
          total_loss: -0.13473483191596136
          vf_explained_var: -1.0
          vf_loss: 0.00015069177041166567
    num_agent_steps_sampled: 2131000
    num_agent_steps_trained: 2131000
    num_steps_sampled: 2131000
    num_steps_trained: 2131000
  iterations_since_restore: 2131
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2131,53366.1,2131000,-0.19,0,-10,395.51




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2132000
  custom_metrics: {}
  date: 2021-10-09_13-14-51
  done: false
  episode_len_mean: 396.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 5970
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9459199799431695
          entropy_coeff: 0.009999999999999998
          kl: 0.013010386944734541
          policy_loss: -0.14870059630937046
          total_loss: -0.15987470601167944
          vf_explained_var: -1.0
          vf_loss: 0.00015496917064107644
    num_agent_steps_sampled: 2132000
    num_agent_steps_trained: 2132000
    num_steps_sampled: 2132000
    num_steps_trained: 2132000
  iterations_since_restore: 2132
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2132,53403.6,2132000,-0.19,0,-10,396.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2133000
  custom_metrics: {}
  date: 2021-10-09_13-15-13
  done: false
  episode_len_mean: 396.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 5973
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8971900555822585
          entropy_coeff: 0.009999999999999998
          kl: 0.01605315877334586
          policy_loss: -0.08184913028445509
          total_loss: -0.09069328251191311
          vf_explained_var: -0.41503867506980896
          vf_loss: 9.621928232566764e-05
    num_agent_steps_sampled: 2133000
    num_agent_steps_trained: 2133000
    num_steps_sampled: 2133000
    num_steps_trained: 2133000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2133,53425.8,2133000,-0.19,0,-10,396.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2134000
  custom_metrics: {}
  date: 2021-10-09_13-15-34
  done: false
  episode_len_mean: 395.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 5975
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8600849390029908
          entropy_coeff: 0.009999999999999998
          kl: 0.017493740555324305
          policy_loss: -0.11272748801857232
          total_loss: -0.12025686694929998
          vf_explained_var: -0.9011121392250061
          vf_loss: 0.000139728168627092
    num_agent_steps_sampled: 2134000
    num_agent_steps_trained: 2134000
    num_steps_sampled: 2134000
    num_steps_trained: 2134000
  iterations_since_restore: 2134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2134,53447.2,2134000,-0.09,0,-5,395.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2135000
  custom_metrics: {}
  date: 2021-10-09_13-15-57
  done: false
  episode_len_mean: 395.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5978
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.861481938097212
          entropy_coeff: 0.009999999999999998
          kl: 0.014505052257189332
          policy_loss: -0.10321858960928189
          total_loss: -0.11266288571059704
          vf_explained_var: -0.8829785585403442
          vf_loss: 0.00010639627313745829
    num_agent_steps_sampled: 2135000
    num_agent_steps_trained: 2135000
    num_steps_sampled: 2135000
    num_steps_trained: 2135000
  iterations_since_restore: 2135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2135,53469.9,2135000,-0.09,0,-5,395.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2136000
  custom_metrics: {}
  date: 2021-10-09_13-16-20
  done: false
  episode_len_mean: 395.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5981
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9045526557498509
          entropy_coeff: 0.009999999999999998
          kl: 0.015113009763800105
          policy_loss: -0.0886901686175002
          total_loss: -0.09813536598036686
          vf_explained_var: -0.8699511885643005
          vf_loss: 0.0001562927991067732
    num_agent_steps_sampled: 2136000
    num_agent_steps_trained: 2136000
    num_steps_sampled: 2136000
    num_steps_trained: 2136000
  iterations_since_restore: 2136

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2136,53492.5,2136000,-0.09,0,-5,395.39


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2137000
  custom_metrics: {}
  date: 2021-10-09_13-16-39
  done: false
  episode_len_mean: 396.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 5983
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8353784852557713
          entropy_coeff: 0.009999999999999998
          kl: 0.015570549346783065
          policy_loss: -0.10330586685902543
          total_loss: -0.11179282563842005
          vf_explained_var: -0.8584817051887512
          vf_loss: 0.00013687578442234856
    num_agent_steps_sampled: 2137000
    num_agent_steps_trained: 2137000
    num_steps_sampled: 2137000
    num_steps_trained: 2137000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2137,53512.2,2137000,-0.09,0,-5,396.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2138000
  custom_metrics: {}
  date: 2021-10-09_13-17-00
  done: false
  episode_len_mean: 397.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 5985
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.5108586132526398
          entropy_coeff: 0.009999999999999998
          kl: 0.013875561766617552
          policy_loss: -0.03391559422016144
          total_loss: -0.04014916523463196
          vf_explained_var: -0.3530907928943634
          vf_loss: 0.00020425263396140912
    num_agent_steps_sampled: 2138000
    num_agent_steps_trained: 2138000
    num_steps_sampled: 2138000
    num_steps_trained: 2138000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2138,53532.6,2138000,-0.09,0,-5,397.63


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2139000
  custom_metrics: {}
  date: 2021-10-09_13-17-26
  done: false
  episode_len_mean: 395.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5988
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.634116820494334
          entropy_coeff: 0.009999999999999998
          kl: 0.013186588173022429
          policy_loss: -0.1624185088608
          total_loss: -0.1704411072863473
          vf_explained_var: -0.8922765254974365
          vf_loss: 7.834588044109599e-05
    num_agent_steps_sampled: 2139000
    num_agent_steps_trained: 2139000
    num_steps_sampled: 2139000
    num_steps_trained: 2139000
  iterations_since_restore: 2139
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2139,53558.4,2139000,-0.09,0,-5,395.56


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2140000
  custom_metrics: {}
  date: 2021-10-09_13-17-49
  done: false
  episode_len_mean: 394.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5991
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.916450916396247
          entropy_coeff: 0.009999999999999998
          kl: 0.013271560894475747
          policy_loss: -0.1361634560343292
          total_loss: -0.14695323656002682
          vf_explained_var: -0.9993160963058472
          vf_loss: 8.140391174593889e-05
    num_agent_steps_sampled: 2140000
    num_agent_steps_trained: 2140000
    num_steps_sampled: 2140000
    num_steps_trained: 2140000
  iterations_since_restore: 2140


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2140,53582,2140000,-0.09,0,-5,394.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2141000
  custom_metrics: {}
  date: 2021-10-09_13-18-13
  done: false
  episode_len_mean: 393.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5994
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8709433158238729
          entropy_coeff: 0.009999999999999998
          kl: 0.016159023432978693
          policy_loss: -0.15581492889258597
          total_loss: -0.16433532271120282
          vf_explained_var: -0.1882660984992981
          vf_loss: 9.13548079147909e-05
    num_agent_steps_sampled: 2141000
    num_agent_steps_trained: 2141000
    num_steps_sampled: 2141000
    num_steps_trained: 2141000
  iterations_since_restore: 2141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2141,53606,2141000,-0.09,0,-5,393.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2142000
  custom_metrics: {}
  date: 2021-10-09_13-18-36
  done: false
  episode_len_mean: 391.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 5997
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245736307568021
          entropy_coeff: 0.009999999999999998
          kl: 0.016212386682787995
          policy_loss: -0.06331508534236087
          total_loss: -0.07124932469386193
          vf_explained_var: -0.5445352792739868
          vf_loss: 0.00018046566070147998
    num_agent_steps_sampled: 2142000
    num_agent_steps_trained: 2142000
    num_steps_sampled: 2142000
    num_steps_trained: 2142000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2142,53629.3,2142000,-0.09,0,-5,391.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2143000
  custom_metrics: {}
  date: 2021-10-09_13-18-59
  done: false
  episode_len_mean: 391.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 5999
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.4837500307295057
          entropy_coeff: 0.009999999999999998
          kl: 0.010345052727669134
          policy_loss: -0.10977653960386911
          total_loss: -0.11805019229650497
          vf_explained_var: -0.38767868280410767
          vf_loss: 9.928213569057536e-05
    num_agent_steps_sampled: 2143000
    num_agent_steps_trained: 2143000
    num_steps_sampled: 2143000
    num_steps_trained: 2143000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2143,53651.4,2143000,-0.09,0,-5,391.56




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2144000
  custom_metrics: {}
  date: 2021-10-09_13-19-40
  done: false
  episode_len_mean: 391.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6002
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.78495735194948
          entropy_coeff: 0.009999999999999998
          kl: 0.01843121052320791
          policy_loss: -0.06444050261957777
          total_loss: -0.07063395681066645
          vf_explained_var: -0.6009892821311951
          vf_loss: 0.00013855672368663364
    num_agent_steps_sampled: 2144000
    num_agent_steps_trained: 2144000
    num_steps_sampled: 2144000
    num_steps_trained: 2144000
  iterations_since_restore: 2144


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2144,53693.1,2144000,-0.09,0,-5,391.35


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2145000
  custom_metrics: {}
  date: 2021-10-09_13-20-00
  done: false
  episode_len_mean: 390.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 6004
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.6905365652508206
          entropy_coeff: 0.009999999999999998
          kl: 0.024834986126055933
          policy_loss: -0.08618103310258853
          total_loss: -0.08749135699537065
          vf_explained_var: -0.4178604483604431
          vf_loss: 7.579521136196693e-05
    num_agent_steps_sampled: 2145000
    num_agent_steps_trained: 2145000
    num_steps_sampled: 2145000
    num_steps_trained: 2145000
  iterations_since_restore: 214

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2145,53713,2145000,-0.09,0,-5,390.74


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2146000
  custom_metrics: {}
  date: 2021-10-09_13-20-22
  done: false
  episode_len_mean: 390.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6007
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7040308833122253
          entropy_coeff: 0.009999999999999998
          kl: 0.010580076677751002
          policy_loss: -0.09443816161817975
          total_loss: -0.10148090463545588
          vf_explained_var: -0.9477608799934387
          vf_loss: 8.041992081820758e-05
    num_agent_steps_sampled: 2146000
    num_agent_steps_trained: 2146000
    num_steps_sampled: 2146000
    num_steps_trained: 2146000
  iterations_since_restore: 214

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2146,53734.9,2146000,-0.06,0,-5,390.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2147000
  custom_metrics: {}
  date: 2021-10-09_13-20-47
  done: false
  episode_len_mean: 389.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6010
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.663162652651469
          entropy_coeff: 0.009999999999999998
          kl: 0.013477860767522444
          policy_loss: -0.08607588567150136
          total_loss: -0.08991248188540339
          vf_explained_var: -0.6093373894691467
          vf_loss: 0.00016167189060070086
    num_agent_steps_sampled: 2147000
    num_agent_steps_trained: 2147000
    num_steps_sampled: 2147000
    num_steps_trained: 2147000
  iterations_since_restore: 2147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2147,53759.3,2147000,-0.06,0,-5,389.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2148000
  custom_metrics: {}
  date: 2021-10-09_13-21-09
  done: false
  episode_len_mean: 389.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 6012
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7397355318069458
          entropy_coeff: 0.009999999999999998
          kl: 0.010037706404110718
          policy_loss: -0.06927279067329234
          total_loss: -0.0771780281017224
          vf_explained_var: -0.9181080460548401
          vf_loss: 8.335482264859214e-05
    num_agent_steps_sampled: 2148000
    num_agent_steps_trained: 2148000
    num_steps_sampled: 2148000
    num_steps_trained: 2148000
  iterations_since_restore: 2148

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2148,53782,2148000,-0.06,0,-5,389.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2149000
  custom_metrics: {}
  date: 2021-10-09_13-21-33
  done: false
  episode_len_mean: 389.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6015
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6493771182166206
          entropy_coeff: 0.009999999999999998
          kl: 0.01003992657492662
          policy_loss: -0.08440259208695756
          total_loss: -0.09140275539830327
          vf_explained_var: -0.6830364465713501
          vf_loss: 8.276604177000182e-05
    num_agent_steps_sampled: 2149000
    num_agent_steps_trained: 2149000
    num_steps_sampled: 2149000
    num_steps_trained: 2149000
  iterations_since_restore: 2149

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2149,53805.3,2149000,-0.06,0,-5,389.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2150000
  custom_metrics: {}
  date: 2021-10-09_13-21-56
  done: false
  episode_len_mean: 387.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6018
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.665880032380422
          entropy_coeff: 0.009999999999999998
          kl: 0.013136966206435199
          policy_loss: -0.018247370421886445
          total_loss: 0.04194482250346078
          vf_explained_var: -0.4548601806163788
          vf_loss: 0.0645371683459315
    num_agent_steps_sampled: 2150000
    num_agent_steps_trained: 2150000
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
  iterations_since_restore: 2150
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2150,53829.1,2150000,-0.09,0,-5,387.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2151000
  custom_metrics: {}
  date: 2021-10-09_13-22-20
  done: false
  episode_len_mean: 387.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6021
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.4840517772568598
          entropy_coeff: 0.009999999999999998
          kl: 0.01033914421862815
          policy_loss: -0.08700599256489013
          total_loss: -0.08705387173427476
          vf_explained_var: 0.06945526599884033
          vf_loss: 0.005101327604562458
    num_agent_steps_sampled: 2151000
    num_agent_steps_trained: 2151000
    num_steps_sampled: 2151000
    num_steps_trained: 2151000
  iterations_since_restore: 2151


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2151,53852.6,2151000,-0.09,0,-5,387.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2152000
  custom_metrics: {}
  date: 2021-10-09_13-22-42
  done: false
  episode_len_mean: 387.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 6023
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7618244489034016
          entropy_coeff: 0.009999999999999998
          kl: 0.014246563125101414
          policy_loss: -0.1620866912520594
          total_loss: -0.1618864008742902
          vf_explained_var: -0.13177868723869324
          vf_loss: 0.004464641470824265
    num_agent_steps_sampled: 2152000
    num_agent_steps_trained: 2152000
    num_steps_sampled: 2152000
    num_steps_trained: 2152000
  iterations_since_restore: 2152


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2152,53874.9,2152000,-0.09,0,-5,387.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2153000
  custom_metrics: {}
  date: 2021-10-09_13-23-06
  done: false
  episode_len_mean: 386.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6026
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.585933596558041
          entropy_coeff: 0.009999999999999998
          kl: 0.010135155454506004
          policy_loss: -0.09787133594767915
          total_loss: -0.10213464152895742
          vf_explained_var: 0.10495000332593918
          vf_loss: 0.0020959275444814313
    num_agent_steps_sampled: 2153000
    num_agent_steps_trained: 2153000
    num_steps_sampled: 2153000
    num_steps_trained: 2153000
  iterations_since_restore: 2153

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2153,53898.3,2153000,-0.09,0,-5,386.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2154000
  custom_metrics: {}
  date: 2021-10-09_13-23-30
  done: false
  episode_len_mean: 384.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6029
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.739827693833245
          entropy_coeff: 0.009999999999999998
          kl: 0.011189490542661515
          policy_loss: -0.12141957134008408
          total_loss: -0.1266970282420516
          vf_explained_var: -0.617257833480835
          vf_loss: 0.0016324436518415394
    num_agent_steps_sampled: 2154000
    num_agent_steps_trained: 2154000
    num_steps_sampled: 2154000
    num_steps_trained: 2154000
  iterations_since_restore: 2154
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2154,53923,2154000,-0.09,0,-5,384.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2155000
  custom_metrics: {}
  date: 2021-10-09_13-24-10
  done: false
  episode_len_mean: 382.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6032
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8473575843705072
          entropy_coeff: 0.009999999999999998
          kl: 0.014253958661065309
          policy_loss: -0.05887186800440152
          total_loss: -0.0629970784402556
          vf_explained_var: -0.938714325428009
          vf_loss: 0.0009875339543214067
    num_agent_steps_sampled: 2155000
    num_agent_steps_trained: 2155000
    num_steps_sampled: 2155000
    num_steps_trained: 2155000
  iterations_since_restore: 2155


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2155,53962.9,2155000,-0.09,0,-5,382.09


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2156000
  custom_metrics: {}
  date: 2021-10-09_13-24-35
  done: false
  episode_len_mean: 380.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6035
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7080528789096407
          entropy_coeff: 0.009999999999999998
          kl: 0.015441349637476126
          policy_loss: -0.08690517250862387
          total_loss: -0.08851555788682566
          vf_explained_var: -0.30708807706832886
          vf_loss: 0.000996320503893205
    num_agent_steps_sampled: 2156000
    num_agent_steps_trained: 2156000
    num_steps_sampled: 2156000
    num_steps_trained: 2156000
  iterations_since_restore: 2156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2156,53987.2,2156000,-0.09,0,-5,380.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2157000
  custom_metrics: {}
  date: 2021-10-09_13-24-58
  done: false
  episode_len_mean: 379.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6038
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7636226812998455
          entropy_coeff: 0.009999999999999998
          kl: 0.012207335356638281
          policy_loss: -0.10806763018998834
          total_loss: -0.11324062815143002
          vf_explained_var: -0.6663092970848083
          vf_loss: 0.0010207820237458994
    num_agent_steps_sampled: 2157000
    num_agent_steps_trained: 2157000
    num_steps_sampled: 2157000
    num_steps_trained: 2157000
  iterations_since_restore: 215

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2157,54010.9,2157000,-0.09,0,-5,379.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2158000
  custom_metrics: {}
  date: 2021-10-09_13-25-23
  done: false
  episode_len_mean: 378.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6041
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7761007295714484
          entropy_coeff: 0.009999999999999998
          kl: 0.014258237667844674
          policy_loss: -0.03561461069103744
          total_loss: -0.039408570693598856
          vf_explained_var: -0.8135502934455872
          vf_loss: 0.0006022050132742151
    num_agent_steps_sampled: 2158000
    num_agent_steps_trained: 2158000
    num_steps_sampled: 2158000
    num_steps_trained: 2158000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2158,54035.8,2158000,-0.09,0,-5,378.06


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2159000
  custom_metrics: {}
  date: 2021-10-09_13-25-42
  done: false
  episode_len_mean: 379.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 6043
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7296983467208014
          entropy_coeff: 0.009999999999999998
          kl: 0.012203909398363886
          policy_loss: -0.0012729542950789133
          total_loss: -0.00661716436346372
          vf_explained_var: -0.8113346695899963
          vf_loss: 0.0005135393502617565
    num_agent_steps_sampled: 2159000
    num_agent_steps_trained: 2159000
    num_steps_sampled: 2159000
    num_steps_trained: 2159000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2159,54055,2159000,-0.09,0,-5,379.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2160000
  custom_metrics: {}
  date: 2021-10-09_13-26-08
  done: false
  episode_len_mean: 377.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6046
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8424514691034952
          entropy_coeff: 0.009999999999999998
          kl: 0.013187558781230182
          policy_loss: -0.04152296901577049
          total_loss: -0.0471911801232232
          vf_explained_var: -0.8425740599632263
          vf_loss: 0.0003950513103821625
    num_agent_steps_sampled: 2160000
    num_agent_steps_trained: 2160000
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
  iterations_since_restore: 2160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2160,54080.1,2160000,-0.09,0,-5,377.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2161000
  custom_metrics: {}
  date: 2021-10-09_13-26-28
  done: false
  episode_len_mean: 378.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 6048
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8196120447582669
          entropy_coeff: 0.009999999999999998
          kl: 0.012424835181354905
          policy_loss: -0.12378900556100739
          total_loss: -0.1300565744853682
          vf_explained_var: -0.7782040238380432
          vf_loss: 0.0002822345070954826
    num_agent_steps_sampled: 2161000
    num_agent_steps_trained: 2161000
    num_steps_sampled: 2161000
    num_steps_trained: 2161000
  iterations_since_restore: 2161

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2161,54100.7,2161000,-0.09,0,-5,378.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2162000
  custom_metrics: {}
  date: 2021-10-09_13-26-53
  done: false
  episode_len_mean: 375.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 6051
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6005877017974854
          entropy_coeff: 0.009999999999999998
          kl: 0.01451330797528165
          policy_loss: -0.10359400254156854
          total_loss: -0.10567177219523324
          vf_explained_var: -0.2505635917186737
          vf_loss: 0.00032417693372634756
    num_agent_steps_sampled: 2162000
    num_agent_steps_trained: 2162000
    num_steps_sampled: 2162000
    num_steps_trained: 2162000
  iterations_since_restore: 216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2162,54125.5,2162000,-0.09,0,-5,375.81


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2163000
  custom_metrics: {}
  date: 2021-10-09_13-27-14
  done: false
  episode_len_mean: 376.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6054
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.696975760989719
          entropy_coeff: 0.009999999999999998
          kl: 0.009239655662271041
          policy_loss: -0.0541917870235112
          total_loss: -0.06205184718386995
          vf_explained_var: -0.6934876441955566
          vf_loss: 0.00044898385224061913
    num_agent_steps_sampled: 2163000
    num_agent_steps_trained: 2163000
    num_steps_sampled: 2163000
    num_steps_trained: 2163000
  iterations_since_restore: 2163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2163,54146.3,2163000,-0.04,0,-4,376.04


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2164000
  custom_metrics: {}
  date: 2021-10-09_13-27-36
  done: false
  episode_len_mean: 376.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6056
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.5353895505269368
          entropy_coeff: 0.009999999999999998
          kl: 0.012503048434011196
          policy_loss: -0.08942564140177435
          total_loss: -0.09267042889777156
          vf_explained_var: -0.5573257803916931
          vf_loss: 0.00038947758099917943
    num_agent_steps_sampled: 2164000
    num_agent_steps_trained: 2164000
    num_steps_sampled: 2164000
    num_steps_trained: 2164000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2164,54168.8,2164000,-0.04,0,-4,376.22


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2165000
  custom_metrics: {}
  date: 2021-10-09_13-28-01
  done: false
  episode_len_mean: 375.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6059
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6237737271520827
          entropy_coeff: 0.009999999999999998
          kl: 0.014421283271579731
          policy_loss: -0.0817254135178195
          total_loss: -0.08410687450216048
          vf_explained_var: -0.5344686508178711
          vf_loss: 0.00033860445611127136
    num_agent_steps_sampled: 2165000
    num_agent_steps_trained: 2165000
    num_steps_sampled: 2165000
    num_steps_trained: 2165000
  iterations_since_restore: 216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2165,54193.6,2165000,-0.04,0,-4,375.08




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2166000
  custom_metrics: {}
  date: 2021-10-09_13-28-47
  done: false
  episode_len_mean: 370.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 6063
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.3910497804482778
          entropy_coeff: 0.009999999999999998
          kl: 0.012513722425263198
          policy_loss: -0.03197803412460619
          total_loss: -0.033951509578360455
          vf_explained_var: 0.07142936438322067
          vf_loss: 0.00020738528764923102
    num_agent_steps_sampled: 2166000
    num_agent_steps_trained: 2166000
    num_steps_sampled: 2166000
    num_steps_trained: 2166000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2166,54239.7,2166000,-0.04,0,-4,370.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2167000
  custom_metrics: {}
  date: 2021-10-09_13-29-09
  done: false
  episode_len_mean: 370.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6065
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.750488864050971
          entropy_coeff: 0.009999999999999998
          kl: 0.014365548185217531
          policy_loss: -0.06603601361728377
          total_loss: -0.06986451353877783
          vf_explained_var: -0.8771597146987915
          vf_loss: 0.0002109599623913204
    num_agent_steps_sampled: 2167000
    num_agent_steps_trained: 2167000
    num_steps_sampled: 2167000
    num_steps_trained: 2167000
  iterations_since_restore: 2167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2167,54261.6,2167000,-0.04,0,-4,370.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2168000
  custom_metrics: {}
  date: 2021-10-09_13-29-31
  done: false
  episode_len_mean: 370.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6068
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.778120564089881
          entropy_coeff: 0.009999999999999998
          kl: 0.010378538642062443
          policy_loss: 0.0040195988284216985
          total_loss: -0.0037876884556478922
          vf_explained_var: -0.14367559552192688
          vf_loss: 0.0002456780023445996
    num_agent_steps_sampled: 2168000
    num_agent_steps_trained: 2168000
    num_steps_sampled: 2168000
    num_steps_trained: 2168000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2168,54283.2,2168000,-0.04,0,-4,370.03


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2169000
  custom_metrics: {}
  date: 2021-10-09_13-29-55
  done: false
  episode_len_mean: 368.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6071
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.843873053126865
          entropy_coeff: 0.009999999999999998
          kl: 0.013125622382043003
          policy_loss: -0.06810976174763507
          total_loss: -0.07412155763142639
          vf_explained_var: -0.22724446654319763
          vf_loss: 0.00012374160412744256
    num_agent_steps_sampled: 2169000
    num_agent_steps_trained: 2169000
    num_steps_sampled: 2169000
    num_steps_trained: 2169000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2169,54307.6,2169000,-0.04,0,-4,368.52


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2170000
  custom_metrics: {}
  date: 2021-10-09_13-30-18
  done: false
  episode_len_mean: 369.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6073
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7297341876559786
          entropy_coeff: 0.009999999999999998
          kl: 0.012271701855233077
          policy_loss: -0.1304426038844718
          total_loss: -0.13594236750569608
          vf_explained_var: -0.4140307605266571
          vf_loss: 0.00029480253853964516
    num_agent_steps_sampled: 2170000
    num_agent_steps_trained: 2170000
    num_steps_sampled: 2170000
    num_steps_trained: 2170000
  iterations_since_restore: 217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2170,54330.3,2170000,-0.04,0,-4,369.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2171000
  custom_metrics: {}
  date: 2021-10-09_13-30-38
  done: false
  episode_len_mean: 369.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6075
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7586540513568454
          entropy_coeff: 0.009999999999999998
          kl: 0.008518362921707812
          policy_loss: -0.06730738725099299
          total_loss: -0.07682734599543942
          vf_explained_var: -0.43870043754577637
          vf_loss: 8.196642633669802e-05
    num_agent_steps_sampled: 2171000
    num_agent_steps_trained: 2171000
    num_steps_sampled: 2171000
    num_steps_trained: 2171000
  iterations_since_restore: 217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2171,54350.1,2171000,-0.04,0,-4,369.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2172000
  custom_metrics: {}
  date: 2021-10-09_13-31-03
  done: false
  episode_len_mean: 368.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 6079
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.3861506435606215
          entropy_coeff: 0.009999999999999998
          kl: 0.012137305292456012
          policy_loss: -0.09943595721075932
          total_loss: -0.1017280787229538
          vf_explained_var: -0.572163999080658
          vf_loss: 0.00019258271070915118
    num_agent_steps_sampled: 2172000
    num_agent_steps_trained: 2172000
    num_steps_sampled: 2172000
    num_steps_trained: 2172000
  iterations_since_restore: 2172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2172,54375.6,2172000,-0.04,0,-4,368.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2173000
  custom_metrics: {}
  date: 2021-10-09_13-31-26
  done: false
  episode_len_mean: 367.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6081
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.92821694082684
          entropy_coeff: 0.009999999999999998
          kl: 0.01019167354958594
          policy_loss: -0.08553113444811768
          total_loss: -0.09509714937044514
          vf_explained_var: -0.8406305909156799
          vf_loss: 0.0001630734666428503
    num_agent_steps_sampled: 2173000
    num_agent_steps_trained: 2173000
    num_steps_sampled: 2173000
    num_steps_trained: 2173000
  iterations_since_restore: 2173
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2173,54398.5,2173000,-0.04,0,-4,367.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2174000
  custom_metrics: {}
  date: 2021-10-09_13-31-51
  done: false
  episode_len_mean: 365.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6084
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.5891104578971862
          entropy_coeff: 0.009999999999999998
          kl: 0.01638057619178552
          policy_loss: -0.06950436646325721
          total_loss: -0.06988748682455884
          vf_explained_var: -0.8184674382209778
          vf_loss: 0.00015378514249783217
    num_agent_steps_sampled: 2174000
    num_agent_steps_trained: 2174000
    num_steps_sampled: 2174000
    num_steps_trained: 2174000
  iterations_since_restore: 217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2174,54423.1,2174000,-0.04,0,-4,365.61


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2175000
  custom_metrics: {}
  date: 2021-10-09_13-32-15
  done: false
  episode_len_mean: 364.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6087
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.72783133453793
          entropy_coeff: 0.009999999999999998
          kl: 0.01247791568429335
          policy_loss: -0.1331029844780763
          total_loss: -0.1385830876727899
          vf_explained_var: -0.6995478272438049
          vf_loss: 0.00010213957822189615
    num_agent_steps_sampled: 2175000
    num_agent_steps_trained: 2175000
    num_steps_sampled: 2175000
    num_steps_trained: 2175000
  iterations_since_restore: 2175
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2175,54447.3,2175000,-0.04,0,-4,364.44




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2176000
  custom_metrics: {}
  date: 2021-10-09_13-32-55
  done: false
  episode_len_mean: 364.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6090
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7591026292906866
          entropy_coeff: 0.009999999999999998
          kl: 0.015640160840272366
          policy_loss: -0.0628382279848059
          total_loss: -0.0655885004128019
          vf_explained_var: -0.5919846296310425
          vf_loss: 0.00018057761335512623
    num_agent_steps_sampled: 2176000
    num_agent_steps_trained: 2176000
    num_steps_sampled: 2176000
    num_steps_trained: 2176000
  iterations_since_restore: 2176

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2176,54487.1,2176000,-0.04,0,-4,364.64


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2177000
  custom_metrics: {}
  date: 2021-10-09_13-33-17
  done: false
  episode_len_mean: 367.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6092
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6315480377939013
          entropy_coeff: 0.009999999999999998
          kl: 0.013455752690734386
          policy_loss: -0.0631043731338448
          total_loss: -0.0665880829302801
          vf_explained_var: -0.7322118878364563
          vf_loss: 0.00021913235258479188
    num_agent_steps_sampled: 2177000
    num_agent_steps_trained: 2177000
    num_steps_sampled: 2177000
    num_steps_trained: 2177000
  iterations_since_restore: 2177


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2177,54508.8,2177000,-0.04,0,-4,367


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2178000
  custom_metrics: {}
  date: 2021-10-09_13-33-41
  done: false
  episode_len_mean: 366.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6095
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.509626528951857
          entropy_coeff: 0.009999999999999998
          kl: 0.012522143339213128
          policy_loss: -0.08753705248236657
          total_loss: -0.09081108938488695
          vf_explained_var: -0.20245040953159332
          vf_loss: 8.469899104157877e-05
    num_agent_steps_sampled: 2178000
    num_agent_steps_trained: 2178000
    num_steps_sampled: 2178000
    num_steps_trained: 2178000
  iterations_since_restore: 2178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2178,54533.1,2178000,-0.04,0,-4,366.2


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2179000
  custom_metrics: {}
  date: 2021-10-09_13-34-03
  done: false
  episode_len_mean: 367.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6097
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.597696634133657
          entropy_coeff: 0.009999999999999998
          kl: 0.008616649340752779
          policy_loss: -0.032785440443290605
          total_loss: -0.04065447379317549
          vf_explained_var: -0.3659200072288513
          vf_loss: 3.1185854802768316e-05
    num_agent_steps_sampled: 2179000
    num_agent_steps_trained: 2179000
    num_steps_sampled: 2179000
    num_steps_trained: 2179000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2179,54555,2179000,-0.04,0,-4,367.37


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2180000
  custom_metrics: {}
  date: 2021-10-09_13-34-24
  done: false
  episode_len_mean: 367.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6100
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7911671002705891
          entropy_coeff: 0.009999999999999998
          kl: 0.012865300225268106
          policy_loss: -0.10119122602045535
          total_loss: -0.10697256123854054
          vf_explained_var: -0.3009490966796875
          vf_loss: 7.115393283230434e-05
    num_agent_steps_sampled: 2180000
    num_agent_steps_trained: 2180000
    num_steps_sampled: 2180000
    num_steps_trained: 2180000
  iterations_since_restore: 218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2180,54576,2180000,-0.04,0,-4,367.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2181000
  custom_metrics: {}
  date: 2021-10-09_13-34-48
  done: false
  episode_len_mean: 367.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6103
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.769041128953298
          entropy_coeff: 0.009999999999999998
          kl: 0.012197072452245274
          policy_loss: -0.09296221000452837
          total_loss: -0.09915851218005022
          vf_explained_var: -0.6872777342796326
          vf_loss: 6.128287370504242e-05
    num_agent_steps_sampled: 2181000
    num_agent_steps_trained: 2181000
    num_steps_sampled: 2181000
    num_steps_trained: 2181000
  iterations_since_restore: 2181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2181,54600.3,2181000,-0.04,0,-4,367.21


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2182000
  custom_metrics: {}
  date: 2021-10-09_13-35-08
  done: false
  episode_len_mean: 366.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6105
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6746579421891106
          entropy_coeff: 0.009999999999999998
          kl: 0.011991358153388098
          policy_loss: -0.05323531325492594
          total_loss: -0.05864209085702896
          vf_explained_var: -0.27580133080482483
          vf_loss: 9.980281538951709e-05
    num_agent_steps_sampled: 2182000
    num_agent_steps_trained: 2182000
    num_steps_sampled: 2182000
    num_steps_trained: 2182000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2182,54619.6,2182000,-0.04,0,-4,366.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2183000
  custom_metrics: {}
  date: 2021-10-09_13-35-29
  done: false
  episode_len_mean: 367.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6108
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8063611918025546
          entropy_coeff: 0.009999999999999998
          kl: 0.014556574602847998
          policy_loss: -0.14502371778297757
          total_loss: -0.14940014720583955
          vf_explained_var: -0.777068018913269
          vf_loss: 4.269992856886044e-05
    num_agent_steps_sampled: 2183000
    num_agent_steps_trained: 2183000
    num_steps_sampled: 2183000
    num_steps_trained: 2183000
  iterations_since_restore: 2183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2183,54641.1,2183000,-0.04,0,-4,367.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2184000
  custom_metrics: {}
  date: 2021-10-09_13-35-50
  done: false
  episode_len_mean: 369.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6110
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7943923075993855
          entropy_coeff: 0.009999999999999998
          kl: 0.014209787632876596
          policy_loss: -0.06651537054114871
          total_loss: -0.07110276362962192
          vf_explained_var: -0.46150848269462585
          vf_loss: 3.71025146402341e-05
    num_agent_steps_sampled: 2184000
    num_agent_steps_trained: 2184000
    num_steps_sampled: 2184000
    num_steps_trained: 2184000
  iterations_since_restore: 218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2184,54661.8,2184000,-0.04,0,-4,369.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2185000
  custom_metrics: {}
  date: 2021-10-09_13-36-10
  done: false
  episode_len_mean: 369.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 6112
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8138855603006152
          entropy_coeff: 0.009999999999999998
          kl: 0.010749004084045468
          policy_loss: -0.08730908075554503
          total_loss: -0.09528327224155267
          vf_explained_var: -1.0
          vf_loss: 8.917573310706454e-05
    num_agent_steps_sampled: 2185000
    num_agent_steps_trained: 2185000
    num_steps_sampled: 2185000
    num_steps_trained: 2185000
  iterations_since_restore: 2185
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2185,54681.5,2185000,-0.04,0,-4,369.75


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2186000
  custom_metrics: {}
  date: 2021-10-09_13-36-32
  done: false
  episode_len_mean: 370.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 6115
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7354434887568155
          entropy_coeff: 0.009999999999999998
          kl: 0.013045267506123438
          policy_loss: -0.07935238439175818
          total_loss: -0.08443357990019851
          vf_explained_var: -0.5595917105674744
          vf_loss: 4.536498836387182e-05
    num_agent_steps_sampled: 2186000
    num_agent_steps_trained: 2186000
    num_steps_sampled: 2186000
    num_steps_trained: 2186000
  iterations_since_restore: 218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2186,54704.1,2186000,-0.04,0,-4,370.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2187000
  custom_metrics: {}
  date: 2021-10-09_13-36-50
  done: false
  episode_len_mean: 371.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6117
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6970789829889934
          entropy_coeff: 0.009999999999999998
          kl: 0.014027601459565235
          policy_loss: -0.09175314779082934
          total_loss: -0.09546734657552507
          vf_explained_var: -0.5777984857559204
          vf_loss: 0.00010793502951855771
    num_agent_steps_sampled: 2187000
    num_agent_steps_trained: 2187000
    num_steps_sampled: 2187000
    num_steps_trained: 2187000
  iterations_since_restore: 2187


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2187,54722.1,2187000,0,0,0,371.19




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2188000
  custom_metrics: {}
  date: 2021-10-09_13-37-27
  done: false
  episode_len_mean: 374.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6120
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7265797071986728
          entropy_coeff: 0.009999999999999998
          kl: 0.007448668626995196
          policy_loss: -0.006295852404501703
          total_loss: -0.016554692165098257
          vf_explained_var: -0.7460110187530518
          vf_loss: 2.5009774114753074e-05
    num_agent_steps_sampled: 2188000
    num_agent_steps_trained: 2188000
    num_steps_sampled: 2188000
    num_steps_trained: 2188000
  iterations_since_restore: 218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2188,54759,2188000,0,0,0,374.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2189000
  custom_metrics: {}
  date: 2021-10-09_13-37-47
  done: false
  episode_len_mean: 375.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6122
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.7615579697820876
          entropy_coeff: 0.009999999999999998
          kl: 0.012389524770528196
          policy_loss: -0.09404413890507486
          total_loss: -0.09997364866236845
          vf_explained_var: -0.8056393265724182
          vf_loss: 7.285121650460901e-05
    num_agent_steps_sampled: 2189000
    num_agent_steps_trained: 2189000
    num_steps_sampled: 2189000
    num_steps_trained: 2189000
  iterations_since_restore: 2189
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2189,54778.6,2189000,0,0,0,375.59


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2190000
  custom_metrics: {}
  date: 2021-10-09_13-38-08
  done: false
  episode_len_mean: 377.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6124
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.5514227350552876
          entropy_coeff: 0.009999999999999998
          kl: 0.00933970950337467
          policy_loss: -0.07020633065452178
          total_loss: -0.07691149543970824
          vf_explained_var: -0.8189579248428345
          vf_loss: 5.456327878871687e-05
    num_agent_steps_sampled: 2190000
    num_agent_steps_trained: 2190000
    num_steps_sampled: 2190000
    num_steps_trained: 2190000
  iterations_since_restore: 2190
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2190,54799.6,2190000,0,0,0,377.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2191000
  custom_metrics: {}
  date: 2021-10-09_13-38-31
  done: false
  episode_len_mean: 376.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6127
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.687816768222385
          entropy_coeff: 0.009999999999999998
          kl: 0.011202901170790824
          policy_loss: -0.07125681628369623
          total_loss: -0.07757494567583005
          vf_explained_var: -0.9354314208030701
          vf_loss: 5.909305727982428e-05
    num_agent_steps_sampled: 2191000
    num_agent_steps_trained: 2191000
    num_steps_sampled: 2191000
    num_steps_trained: 2191000
  iterations_since_restore: 2191
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2191,54822.5,2191000,0,0,0,376.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2192000
  custom_metrics: {}
  date: 2021-10-09_13-38-53
  done: false
  episode_len_mean: 378.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6130
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.549998950958252
          entropy_coeff: 0.009999999999999998
          kl: 0.009960106471151104
          policy_loss: -0.06850078931699197
          total_loss: -0.07442783410660922
          vf_explained_var: -0.7341557145118713
          vf_loss: 0.00023692016362070314
    num_agent_steps_sampled: 2192000
    num_agent_steps_trained: 2192000
    num_steps_sampled: 2192000
    num_steps_trained: 2192000
  iterations_since_restore: 2192
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2192,54845,2192000,0,0,0,378.44


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2193000
  custom_metrics: {}
  date: 2021-10-09_13-39-16
  done: false
  episode_len_mean: 378.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6132
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6675938381089104
          entropy_coeff: 0.009999999999999998
          kl: 0.012598252330649136
          policy_loss: -0.09765391298052338
          total_loss: -0.102473702467978
          vf_explained_var: -0.5104542374610901
          vf_loss: 4.7283139186523236e-05
    num_agent_steps_sampled: 2193000
    num_agent_steps_trained: 2193000
    num_steps_sampled: 2193000
    num_steps_trained: 2193000
  iterations_since_restore: 2193
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2193,54867.6,2193000,0,0,0,378.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2194000
  custom_metrics: {}
  date: 2021-10-09_13-39-40
  done: false
  episode_len_mean: 379.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6135
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.750727325015598
          entropy_coeff: 0.009999999999999998
          kl: 0.012257956036451705
          policy_loss: -0.06869616934822666
          total_loss: -0.07467013456755214
          vf_explained_var: -0.9183046817779541
          vf_loss: 4.3413879514749475e-05
    num_agent_steps_sampled: 2194000
    num_agent_steps_trained: 2194000
    num_steps_sampled: 2194000
    num_steps_trained: 2194000
  iterations_since_restore: 2194
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2194,54892,2194000,0,0,0,379.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2195000
  custom_metrics: {}
  date: 2021-10-09_13-40-02
  done: false
  episode_len_mean: 379.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6138
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.9569346931245593
          entropy_coeff: 0.009999999999999998
          kl: 0.005543600087558629
          policy_loss: -0.028286136893762484
          total_loss: -0.042628717205176754
          vf_explained_var: -0.978523850440979
          vf_loss: 3.0518455009263965e-05
    num_agent_steps_sampled: 2195000
    num_agent_steps_trained: 2195000
    num_steps_sampled: 2195000
    num_steps_trained: 2195000
  iterations_since_restore: 2195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2195,54913.5,2195000,0,0,0,379.99


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2196000
  custom_metrics: {}
  date: 2021-10-09_13-40-25
  done: false
  episode_len_mean: 381.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6140
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.813429590066274
          entropy_coeff: 0.009999999999999998
          kl: 0.016465240854639802
          policy_loss: -0.062429039552807805
          total_loss: -0.06508032327724828
          vf_explained_var: -0.6402246356010437
          vf_loss: 4.9453395533621205e-05
    num_agent_steps_sampled: 2196000
    num_agent_steps_trained: 2196000
    num_steps_sampled: 2196000
    num_steps_trained: 2196000
  iterations_since_restore: 2196


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2196,54936.4,2196000,0,0,0,381.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2197000
  custom_metrics: {}
  date: 2021-10-09_13-40-50
  done: false
  episode_len_mean: 377.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6143
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.495314551062054
          entropy_coeff: 0.009999999999999998
          kl: 0.009101362457189862
          policy_loss: -0.03510390143427584
          total_loss: -0.040946941367454
          vf_explained_var: -0.8250293135643005
          vf_loss: 0.0005790210372348762
    num_agent_steps_sampled: 2197000
    num_agent_steps_trained: 2197000
    num_steps_sampled: 2197000
    num_steps_trained: 2197000
  iterations_since_restore: 2197
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2197,54961.4,2197000,0,0,0,377.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2198000
  custom_metrics: {}
  date: 2021-10-09_13-41-10
  done: false
  episode_len_mean: 380.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6146
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8362702541881137
          entropy_coeff: 0.009999999999999998
          kl: 0.011708999817216794
          policy_loss: -0.10808265490664376
          total_loss: -0.11543590198788378
          vf_explained_var: -0.8727906942367554
          vf_loss: 3.4121316427141814e-05
    num_agent_steps_sampled: 2198000
    num_agent_steps_trained: 2198000
    num_steps_sampled: 2198000
    num_steps_trained: 2198000
  iterations_since_restore: 2198


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2198,54981.6,2198000,0,0,0,380.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2199000
  custom_metrics: {}
  date: 2021-10-09_13-41-38
  done: false
  episode_len_mean: 377.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6149
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.2262468258539836
          entropy_coeff: 0.009999999999999998
          kl: 0.008695286557128491
          policy_loss: -0.02559141777455807
          total_loss: -0.029683317533797687
          vf_explained_var: -0.6965359449386597
          vf_loss: 2.011319371983215e-05
    num_agent_steps_sampled: 2199000
    num_agent_steps_trained: 2199000
    num_steps_sampled: 2199000
    num_steps_trained: 2199000
  iterations_since_restore: 2199


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2199,55009.5,2199000,0,0,0,377.04




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2200000
  custom_metrics: {}
  date: 2021-10-09_13-42-20
  done: false
  episode_len_mean: 377.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6152
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.6326356357998317
          entropy_coeff: 0.009999999999999998
          kl: 0.015061317218965956
          policy_loss: -0.09389123175707129
          total_loss: -0.09606066962911022
          vf_explained_var: -0.956635594367981
          vf_loss: 3.9318384652789166e-05
    num_agent_steps_sampled: 2200000
    num_agent_steps_trained: 2200000
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
  iterations_since_restore: 2200
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2200,55051.3,2200000,0,0,0,377.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2201000
  custom_metrics: {}
  date: 2021-10-09_13-42-43
  done: false
  episode_len_mean: 375.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6155
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8149878157509698
          entropy_coeff: 0.009999999999999998
          kl: 0.010787216479208755
          policy_loss: -0.0849183929980629
          total_loss: -0.09289655842714839
          vf_explained_var: -0.9078487157821655
          vf_loss: 6.040281081772668e-05
    num_agent_steps_sampled: 2201000
    num_agent_steps_trained: 2201000
    num_steps_sampled: 2201000
    num_steps_trained: 2201000
  iterations_since_restore: 2201
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2201,55074.7,2201000,0,0,0,375.86


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2202000
  custom_metrics: {}
  date: 2021-10-09_13-43-07
  done: false
  episode_len_mean: 375.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6157
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.5875723944769966
          entropy_coeff: 0.009999999999999998
          kl: 0.025402805626056117
          policy_loss: -0.06290896534919739
          total_loss: -0.054556876814199816
          vf_explained_var: -0.6541520357131958
          vf_loss: 0.00041670615786036554
    num_agent_steps_sampled: 2202000
    num_agent_steps_trained: 2202000
    num_steps_sampled: 2202000
    num_steps_trained: 2202000
  iterations_since_restore: 2202

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2202,55098.8,2202000,0,0,0,375.32


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2203000
  custom_metrics: {}
  date: 2021-10-09_13-43-27
  done: false
  episode_len_mean: 378.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6160
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7851732982529533
          entropy_coeff: 0.009999999999999998
          kl: 0.005097625592788152
          policy_loss: -0.09961604825738403
          total_loss: -0.11026464241246382
          vf_explained_var: -0.7173648476600647
          vf_loss: 3.58153166505954e-05
    num_agent_steps_sampled: 2203000
    num_agent_steps_trained: 2203000
    num_steps_sampled: 2203000
    num_steps_trained: 2203000
  iterations_since_restore: 2203
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2203,55118.6,2203000,0,0,0,378.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2204000
  custom_metrics: {}
  date: 2021-10-09_13-43-48
  done: false
  episode_len_mean: 380.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6162
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9009988917244804
          entropy_coeff: 0.009999999999999998
          kl: 0.009017042727569407
          policy_loss: -0.08828984153353506
          total_loss: -0.09455790925357077
          vf_explained_var: -0.4526161551475525
          vf_loss: 6.384337205721143e-05
    num_agent_steps_sampled: 2204000
    num_agent_steps_trained: 2204000
    num_steps_sampled: 2204000
    num_steps_trained: 2204000
  iterations_since_restore: 2204
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2204,55139.3,2204000,0,0,0,380.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2205000
  custom_metrics: {}
  date: 2021-10-09_13-44-08
  done: false
  episode_len_mean: 382.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6165
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8803666909535726
          entropy_coeff: 0.009999999999999998
          kl: 0.0079135504649689
          policy_loss: -0.057797738682064746
          total_loss: -0.06542945504188538
          vf_explained_var: -0.885703444480896
          vf_loss: 4.53976445997897e-05
    num_agent_steps_sampled: 2205000
    num_agent_steps_trained: 2205000
    num_steps_sampled: 2205000
    num_steps_trained: 2205000
  iterations_since_restore: 2205
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2205,55159.5,2205000,0,0,0,382.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2206000
  custom_metrics: {}
  date: 2021-10-09_13-44-32
  done: false
  episode_len_mean: 380.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6167
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8375127249293857
          entropy_coeff: 0.009999999999999998
          kl: 0.0073433796385175614
          policy_loss: -0.03744538115958373
          total_loss: -0.045464236641095744
          vf_explained_var: -0.1300857812166214
          vf_loss: 3.138984245399317e-05
    num_agent_steps_sampled: 2206000
    num_agent_steps_trained: 2206000
    num_steps_sampled: 2206000
    num_steps_trained: 2206000
  iterations_since_restore: 2206
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2206,55183.2,2206000,0,0,0,380.25


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2207000
  custom_metrics: {}
  date: 2021-10-09_13-44-50
  done: false
  episode_len_mean: 383.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6170
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9603555308447944
          entropy_coeff: 0.009999999999999998
          kl: 0.008668641795227295
          policy_loss: -0.1023853395651612
          total_loss: -0.10971853455735578
          vf_explained_var: -0.9548587203025818
          vf_loss: 8.214164556395392e-05
    num_agent_steps_sampled: 2207000
    num_agent_steps_trained: 2207000
    num_steps_sampled: 2207000
    num_steps_trained: 2207000
  iterations_since_restore: 2207
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2207,55201.5,2207000,0,0,0,383.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2208000
  custom_metrics: {}
  date: 2021-10-09_13-45-11
  done: false
  episode_len_mean: 383.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6172
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.845840338865916
          entropy_coeff: 0.009999999999999998
          kl: 0.00828165501649332
          policy_loss: -0.07178416239718596
          total_loss: -0.07852260015077062
          vf_explained_var: -0.604774534702301
          vf_loss: 7.58551597553176e-05
    num_agent_steps_sampled: 2208000
    num_agent_steps_trained: 2208000
    num_steps_sampled: 2208000
    num_steps_trained: 2208000
  iterations_since_restore: 2208
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2208,55222.5,2208000,0,0,0,383.55


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2209000
  custom_metrics: {}
  date: 2021-10-09_13-45-34
  done: false
  episode_len_mean: 383.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6175
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 2.062662597497304
          entropy_coeff: 0.009999999999999998
          kl: 0.008596842537758359
          policy_loss: -0.05980316350857417
          total_loss: -0.06832549663053619
          vf_explained_var: -0.8553141951560974
          vf_loss: 1.7025165066621007e-05
    num_agent_steps_sampled: 2209000
    num_agent_steps_trained: 2209000
    num_steps_sampled: 2209000
    num_steps_trained: 2209000
  iterations_since_restore: 2209
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2209,55245.9,2209000,0,0,0,383.26


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2210000
  custom_metrics: {}
  date: 2021-10-09_13-45-58
  done: false
  episode_len_mean: 383.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6177
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8304165138138666
          entropy_coeff: 0.009999999999999998
          kl: 0.011314747940636138
          policy_loss: -0.10807167895966106
          total_loss: -0.11044293886257542
          vf_explained_var: -0.5207492113113403
          vf_loss: 2.422783053488173e-05
    num_agent_steps_sampled: 2210000
    num_agent_steps_trained: 2210000
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
  iterations_since_restore: 2210
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2210,55269,2210000,0,0,0,383.33




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2211000
  custom_metrics: {}
  date: 2021-10-09_13-46-33
  done: false
  episode_len_mean: 385.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6180
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7397284706433613
          entropy_coeff: 0.009999999999999998
          kl: 0.005991394931245993
          policy_loss: -0.07276802712844478
          total_loss: -0.08173406525101098
          vf_explained_var: -0.7901771664619446
          vf_loss: 7.272068901927516e-06
    num_agent_steps_sampled: 2211000
    num_agent_steps_trained: 2211000
    num_steps_sampled: 2211000
    num_steps_trained: 2211000
  iterations_since_restore: 2211
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2211,55304.1,2211000,0,0,0,385.51


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2212000
  custom_metrics: {}
  date: 2021-10-09_13-46-58
  done: false
  episode_len_mean: 384.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6183
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7477370566791959
          entropy_coeff: 0.009999999999999998
          kl: 0.005665516603984915
          policy_loss: -0.16946440810958543
          total_loss: -0.17896139282319282
          vf_explained_var: -0.7174447178840637
          vf_loss: 1.459678572549213e-05
    num_agent_steps_sampled: 2212000
    num_agent_steps_trained: 2212000
    num_steps_sampled: 2212000
    num_steps_trained: 2212000
  iterations_since_restore: 2212
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2212,55329.5,2212000,0,0,0,384.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2213000
  custom_metrics: {}
  date: 2021-10-09_13-47-18
  done: false
  episode_len_mean: 386.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6185
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.841645356019338
          entropy_coeff: 0.009999999999999998
          kl: 0.008226592850006369
          policy_loss: -0.14388681459758018
          total_loss: -0.1507052721662654
          vf_explained_var: -0.7800365090370178
          vf_loss: 3.1302624029194705e-05
    num_agent_steps_sampled: 2213000
    num_agent_steps_trained: 2213000
    num_steps_sampled: 2213000
    num_steps_trained: 2213000
  iterations_since_restore: 2213
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2213,55349.6,2213000,0,0,0,386.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2214000
  custom_metrics: {}
  date: 2021-10-09_13-47-38
  done: false
  episode_len_mean: 387.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6187
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 2.0246052609549627
          entropy_coeff: 0.009999999999999998
          kl: 0.007728034049635074
          policy_loss: -0.10581989917490217
          total_loss: -0.11519146052499613
          vf_explained_var: -0.9834377765655518
          vf_loss: 8.779372092249104e-06
    num_agent_steps_sampled: 2214000
    num_agent_steps_trained: 2214000
    num_steps_sampled: 2214000
    num_steps_trained: 2214000
  iterations_since_restore: 2214
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2214,55369.7,2214000,0,0,0,387.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2215000
  custom_metrics: {}
  date: 2021-10-09_13-47-59
  done: false
  episode_len_mean: 389.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6190
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8820245358679029
          entropy_coeff: 0.009999999999999998
          kl: 0.006098722648770523
          policy_loss: -0.02112301624276572
          total_loss: -0.03133676695740885
          vf_explained_var: -0.8781288862228394
          vf_loss: 3.161432636665672e-05
    num_agent_steps_sampled: 2215000
    num_agent_steps_trained: 2215000
    num_steps_sampled: 2215000
    num_steps_trained: 2215000
  iterations_since_restore: 2215
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2215,55390.7,2215000,0,0,0,389.13


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2216000
  custom_metrics: {}
  date: 2021-10-09_13-48-20
  done: false
  episode_len_mean: 388.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6192
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.910293832090166
          entropy_coeff: 0.009999999999999998
          kl: 0.009492650925041346
          policy_loss: -0.11084193566607105
          total_loss: -0.11657450633744398
          vf_explained_var: -0.9495803117752075
          vf_loss: 2.3582206399118553e-05
    num_agent_steps_sampled: 2216000
    num_agent_steps_trained: 2216000
    num_steps_sampled: 2216000
    num_steps_trained: 2216000
  iterations_since_restore: 2216
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2216,55410.9,2216000,0,0,0,388.49


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2217000
  custom_metrics: {}
  date: 2021-10-09_13-48-38
  done: false
  episode_len_mean: 389.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6194
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7350464781125388
          entropy_coeff: 0.009999999999999998
          kl: 0.007436574427011896
          policy_loss: -0.04245408098730776
          total_loss: -0.04933744710352686
          vf_explained_var: -0.8530519008636475
          vf_loss: 1.1183441210960155e-05
    num_agent_steps_sampled: 2217000
    num_agent_steps_trained: 2217000
    num_steps_sampled: 2217000
    num_steps_trained: 2217000
  iterations_since_restore: 2217
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2217,55429.4,2217000,0,0,0,389.76


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2218000
  custom_metrics: {}
  date: 2021-10-09_13-49-00
  done: false
  episode_len_mean: 389.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6197
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.626267467604743
          entropy_coeff: 0.009999999999999998
          kl: 0.008056477271780837
          policy_loss: -0.07710918562693728
          total_loss: -0.0819095115073853
          vf_explained_var: -0.605180561542511
          vf_loss: 0.0001348383104211987
    num_agent_steps_sampled: 2218000
    num_agent_steps_trained: 2218000
    num_steps_sampled: 2218000
    num_steps_trained: 2218000
  iterations_since_restore: 2218
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2218,55450.8,2218000,0,0,0,389.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2219000
  custom_metrics: {}
  date: 2021-10-09_13-49-20
  done: false
  episode_len_mean: 392.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6199
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8760836839675903
          entropy_coeff: 0.009999999999999998
          kl: 0.0067401159160483575
          policy_loss: -0.10881136175658968
          total_loss: -0.11807999658501811
          vf_explained_var: -0.5936011672019958
          vf_loss: 1.551292480459981e-05
    num_agent_steps_sampled: 2219000
    num_agent_steps_trained: 2219000
    num_steps_sampled: 2219000
    num_steps_trained: 2219000
  iterations_since_restore: 2219
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2219,55471.1,2219000,0,0,0,392.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2220000
  custom_metrics: {}
  date: 2021-10-09_13-49-36
  done: false
  episode_len_mean: 394.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6202
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9016650782691107
          entropy_coeff: 0.009999999999999998
          kl: 0.006496808055020588
          policy_loss: -0.07147155179538661
          total_loss: -0.08134506986890402
          vf_explained_var: -0.6122839450836182
          vf_loss: 8.538491212246097e-06
    num_agent_steps_sampled: 2220000
    num_agent_steps_trained: 2220000
    num_steps_sampled: 2220000
    num_steps_trained: 2220000
  iterations_since_restore: 2220
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2220,55487.7,2220000,0,0,0,394.07


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2221000
  custom_metrics: {}
  date: 2021-10-09_13-50-00
  done: false
  episode_len_mean: 393.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6204
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7613807916641235
          entropy_coeff: 0.009999999999999998
          kl: 0.007336482663393282
          policy_loss: -0.0673343319238888
          total_loss: -0.07461816565030151
          vf_explained_var: -0.7037199139595032
          vf_loss: 1.4785542200317852e-05
    num_agent_steps_sampled: 2221000
    num_agent_steps_trained: 2221000
    num_steps_sampled: 2221000
    num_steps_trained: 2221000
  iterations_since_restore: 2221
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2221,55511.2,2221000,0,0,0,393.6


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2222000
  custom_metrics: {}
  date: 2021-10-09_13-50-22
  done: false
  episode_len_mean: 393.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6207
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.869133558538225
          entropy_coeff: 0.009999999999999998
          kl: 0.007883686139611242
          policy_loss: -0.10085743284887738
          total_loss: -0.10845177931090196
          vf_explained_var: -0.8822969198226929
          vf_loss: 1.2428805838579945e-05
    num_agent_steps_sampled: 2222000
    num_agent_steps_trained: 2222000
    num_steps_sampled: 2222000
    num_steps_trained: 2222000
  iterations_since_restore: 2222
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2222,55533,2222000,0,0,0,393.15




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2223000
  custom_metrics: {}
  date: 2021-10-09_13-51-04
  done: false
  episode_len_mean: 389.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6210
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8653545339902242
          entropy_coeff: 0.009999999999999998
          kl: 0.00734354486167644
          policy_loss: -0.054240027111437585
          total_loss: -0.06254262949029604
          vf_explained_var: -0.3559887409210205
          vf_loss: 2.5826870528867907e-05
    num_agent_steps_sampled: 2223000
    num_agent_steps_trained: 2223000
    num_steps_sampled: 2223000
    num_steps_trained: 2223000
  iterations_since_restore: 2223
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2223,55575.1,2223000,0,0,0,389.95


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2224000
  custom_metrics: {}
  date: 2021-10-09_13-51-31
  done: false
  episode_len_mean: 386.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6213
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7556234942542182
          entropy_coeff: 0.009999999999999998
          kl: 0.00834875864479285
          policy_loss: -0.057072661444544794
          total_loss: -0.06287940310107337
          vf_explained_var: -0.13420650362968445
          vf_loss: 1.1030368765811242e-05
    num_agent_steps_sampled: 2224000
    num_agent_steps_trained: 2224000
    num_steps_sampled: 2224000
    num_steps_trained: 2224000
  iterations_since_restore: 2224
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2224,55602,2224000,0,0,0,386.71


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2225000
  custom_metrics: {}
  date: 2021-10-09_13-51-55
  done: false
  episode_len_mean: 386.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6216
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9808762298689948
          entropy_coeff: 0.009999999999999998
          kl: 0.008274972721478157
          policy_loss: -0.12479787241253588
          total_loss: -0.13295605861478382
          vf_explained_var: -1.0
          vf_loss: 1.5860606142976192e-05
    num_agent_steps_sampled: 2225000
    num_agent_steps_trained: 2225000
    num_steps_sampled: 2225000
    num_steps_trained: 2225000
  iterations_since_restore: 2225
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2225,55626.6,2225000,0,0,0,386.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2226000
  custom_metrics: {}
  date: 2021-10-09_13-52-18
  done: false
  episode_len_mean: 383.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6218
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8440495000945196
          entropy_coeff: 0.009999999999999998
          kl: 0.00985743012636896
          policy_loss: -0.04025083800984754
          total_loss: -0.04481641853021251
          vf_explained_var: -0.9712117314338684
          vf_loss: 1.5246986125324232e-05
    num_agent_steps_sampled: 2226000
    num_agent_steps_trained: 2226000
    num_steps_sampled: 2226000
    num_steps_trained: 2226000
  iterations_since_restore: 2226
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2226,55649.5,2226000,0,0,0,383.84


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2227000
  custom_metrics: {}
  date: 2021-10-09_13-52-45
  done: false
  episode_len_mean: 381.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 6222
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 2.002534708711836
          entropy_coeff: 0.009999999999999998
          kl: 0.007375602011906956
          policy_loss: -0.05316890683025122
          total_loss: -0.06281378213316202
          vf_explained_var: -0.72187340259552
          vf_loss: 1.0284815122455863e-05
    num_agent_steps_sampled: 2227000
    num_agent_steps_trained: 2227000
    num_steps_sampled: 2227000
    num_steps_trained: 2227000
  iterations_since_restore: 2227
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2227,55676,2227000,0,0,0,381.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2228000
  custom_metrics: {}
  date: 2021-10-09_13-53-06
  done: false
  episode_len_mean: 380.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6224
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9310290892918904
          entropy_coeff: 0.009999999999999998
          kl: 0.008246816149912739
          policy_loss: -0.12137962132692337
          total_loss: -0.12907575791080791
          vf_explained_var: -0.7489047646522522
          vf_loss: 1.9027004438814604e-05
    num_agent_steps_sampled: 2228000
    num_agent_steps_trained: 2228000
    num_steps_sampled: 2228000
    num_steps_trained: 2228000
  iterations_since_restore: 2228
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2228,55696.8,2228000,0,0,0,380.77


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2229000
  custom_metrics: {}
  date: 2021-10-09_13-53-29
  done: false
  episode_len_mean: 382.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6226
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9649935947524177
          entropy_coeff: 0.009999999999999998
          kl: 0.00768126282858979
          policy_loss: -0.07634568934639295
          total_loss: -0.08515493534505367
          vf_explained_var: -0.7460266947746277
          vf_loss: 4.074065560012564e-05
    num_agent_steps_sampled: 2229000
    num_agent_steps_trained: 2229000
    num_steps_sampled: 2229000
    num_steps_trained: 2229000
  iterations_since_restore: 2229
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2229,55719.8,2229000,0,0,0,382.08


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2230000
  custom_metrics: {}
  date: 2021-10-09_13-53-54
  done: false
  episode_len_mean: 381.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6229
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7608739958869086
          entropy_coeff: 0.009999999999999998
          kl: 0.009336001753018448
          policy_loss: -0.06676048539164993
          total_loss: -0.0712203725344605
          vf_explained_var: -0.7463920712471008
          vf_loss: 2.231737782696857e-05
    num_agent_steps_sampled: 2230000
    num_agent_steps_trained: 2230000
    num_steps_sampled: 2230000
    num_steps_trained: 2230000
  iterations_since_restore: 2230
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2230,55745.3,2230000,0,0,0,381.05


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2231000
  custom_metrics: {}
  date: 2021-10-09_13-54-18
  done: false
  episode_len_mean: 381.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6232
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 2.003359219763014
          entropy_coeff: 0.009999999999999998
          kl: 0.008900913354390678
          policy_loss: -0.10940637112491661
          total_loss: -0.11691155747200052
          vf_explained_var: -0.5926617383956909
          vf_loss: 1.3613457794134672e-05
    num_agent_steps_sampled: 2231000
    num_agent_steps_trained: 2231000
    num_steps_sampled: 2231000
    num_steps_trained: 2231000
  iterations_since_restore: 2231
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2231,55769.2,2231000,0,0,0,381.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2232000
  custom_metrics: {}
  date: 2021-10-09_13-54-38
  done: false
  episode_len_mean: 381.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6234
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9098302629258899
          entropy_coeff: 0.009999999999999998
          kl: 0.007552522555565646
          policy_loss: 0.028118297478391063
          total_loss: 0.01965811126348045
          vf_explained_var: -0.5871198177337646
          vf_loss: 1.9176483409763832e-05
    num_agent_steps_sampled: 2232000
    num_agent_steps_trained: 2232000
    num_steps_sampled: 2232000
    num_steps_trained: 2232000
  iterations_since_restore: 2232
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2232,55788.9,2232000,0,0,0,381.12


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2233000
  custom_metrics: {}
  date: 2021-10-09_13-55-03
  done: false
  episode_len_mean: 381.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6237
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.906477333439721
          entropy_coeff: 0.009999999999999998
          kl: 0.009779046579408332
          policy_loss: -0.09569713626470831
          total_loss: -0.10097776154677073
          vf_explained_var: -0.24667538702487946
          vf_loss: 3.4685874233117726e-05
    num_agent_steps_sampled: 2233000
    num_agent_steps_trained: 2233000
    num_steps_sampled: 2233000
    num_steps_trained: 2233000
  iterations_since_restore: 2233
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2233,55814.3,2233000,0,0,0,381.96




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2234000
  custom_metrics: {}
  date: 2021-10-09_13-55-45
  done: false
  episode_len_mean: 380.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6240
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8930156588554383
          entropy_coeff: 0.009999999999999998
          kl: 0.009630161169958127
          policy_loss: -0.08749853554699157
          total_loss: -0.09285989069483347
          vf_explained_var: -0.6548614501953125
          vf_loss: 2.867340668267894e-05
    num_agent_steps_sampled: 2234000
    num_agent_steps_trained: 2234000
    num_steps_sampled: 2234000
    num_steps_trained: 2234000
  iterations_since_restore: 2234
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2234,55855.6,2234000,0,0,0,380.45


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2235000
  custom_metrics: {}
  date: 2021-10-09_13-56-08
  done: false
  episode_len_mean: 382.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6243
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8773285733328926
          entropy_coeff: 0.009999999999999998
          kl: 0.009991402431957466
          policy_loss: -0.08538589355432324
          total_loss: -0.09007973178393311
          vf_explained_var: -0.4429772198200226
          vf_loss: 3.14116269287802e-05
    num_agent_steps_sampled: 2235000
    num_agent_steps_trained: 2235000
    num_steps_sampled: 2235000
    num_steps_trained: 2235000
  iterations_since_restore: 2235
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2235,55879.3,2235000,0,0,0,382.33


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2236000
  custom_metrics: {}
  date: 2021-10-09_13-56-32
  done: false
  episode_len_mean: 379.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6246
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7787089347839355
          entropy_coeff: 0.009999999999999998
          kl: 0.006745253957754392
          policy_loss: -0.15758906114432547
          total_loss: -0.16586326269639862
          vf_explained_var: -0.31109336018562317
          vf_loss: 2.897593787262546e-05
    num_agent_steps_sampled: 2236000
    num_agent_steps_trained: 2236000
    num_steps_sampled: 2236000
    num_steps_trained: 2236000
  iterations_since_restore: 2236
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2236,55903.3,2236000,0,0,0,379.93


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2237000
  custom_metrics: {}
  date: 2021-10-09_13-56-52
  done: false
  episode_len_mean: 382.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6248
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.702760234143999
          entropy_coeff: 0.009999999999999998
          kl: 0.00966355135940182
          policy_loss: -0.047821283236973815
          total_loss: -0.05124608665290806
          vf_explained_var: -0.24150867760181427
          vf_loss: 1.572230796682561e-05
    num_agent_steps_sampled: 2237000
    num_agent_steps_trained: 2237000
    num_steps_sampled: 2237000
    num_steps_trained: 2237000
  iterations_since_restore: 2237
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2237,55923.4,2237000,0,0,0,382.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2238000
  custom_metrics: {}
  date: 2021-10-09_13-57-17
  done: false
  episode_len_mean: 382.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6251
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9015979382726882
          entropy_coeff: 0.009999999999999998
          kl: 0.010597039120841246
          policy_loss: -0.08352909353044298
          total_loss: -0.08763371528022819
          vf_explained_var: -0.22381852567195892
          vf_loss: 1.1789575076666854e-05
    num_agent_steps_sampled: 2238000
    num_agent_steps_trained: 2238000
    num_steps_sampled: 2238000
    num_steps_trained: 2238000
  iterations_since_restore: 2238
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2238,55947.4,2238000,0,0,0,382.94


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2239000
  custom_metrics: {}
  date: 2021-10-09_13-57-42
  done: false
  episode_len_mean: 382.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6254
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.798365721437666
          entropy_coeff: 0.009999999999999998
          kl: 0.005932042354611156
          policy_loss: -0.024804547201428148
          total_loss: -0.03440388072695997
          vf_explained_var: -0.8234062790870667
          vf_loss: 4.3797101246430735e-05
    num_agent_steps_sampled: 2239000
    num_agent_steps_trained: 2239000
    num_steps_sampled: 2239000
    num_steps_trained: 2239000
  iterations_since_restore: 2239
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2239,55972.7,2239000,0,0,0,382.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2240000
  custom_metrics: {}
  date: 2021-10-09_13-58-05
  done: false
  episode_len_mean: 383.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6257
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8575715594821507
          entropy_coeff: 0.009999999999999998
          kl: 0.007063827146338575
          policy_loss: -0.06834361695994934
          total_loss: -0.07696381409962973
          vf_explained_var: -0.5765761733055115
          vf_loss: 2.3688558586501233e-05
    num_agent_steps_sampled: 2240000
    num_agent_steps_trained: 2240000
    num_steps_sampled: 2240000
    num_steps_trained: 2240000
  iterations_since_restore: 2240
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2240,55996,2240000,0,0,0,383.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2241000
  custom_metrics: {}
  date: 2021-10-09_13-58-28
  done: false
  episode_len_mean: 381.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6259
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8498374342918396
          entropy_coeff: 0.009999999999999998
          kl: 0.008461004385275849
          policy_loss: -0.053844401530093615
          total_loss: -0.06042486656871107
          vf_explained_var: -0.662356972694397
          vf_loss: 2.163228652231434e-05
    num_agent_steps_sampled: 2241000
    num_agent_steps_trained: 2241000
    num_steps_sampled: 2241000
    num_steps_trained: 2241000
  iterations_since_restore: 2241
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2241,56018.9,2241000,0,0,0,381.7


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2242000
  custom_metrics: {}
  date: 2021-10-09_13-58-50
  done: false
  episode_len_mean: 381.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6262
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.778980142540402
          entropy_coeff: 0.009999999999999998
          kl: 0.00784089279379955
          policy_loss: 0.019041503220796584
          total_loss: 0.012307412322196695
          vf_explained_var: -0.2737424373626709
          vf_loss: 3.131759387744953e-05
    num_agent_steps_sampled: 2242000
    num_agent_steps_trained: 2242000
    num_steps_sampled: 2242000
    num_steps_trained: 2242000
  iterations_since_restore: 2242
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2242,56041.2,2242000,0,0,0,381.19


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2243000
  custom_metrics: {}
  date: 2021-10-09_13-59-12
  done: false
  episode_len_mean: 380.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6265
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6980475438965692
          entropy_coeff: 0.009999999999999998
          kl: 0.00861256182278696
          policy_loss: -0.08272370936142073
          total_loss: -0.08744079648620552
          vf_explained_var: -0.8176066279411316
          vf_loss: 0.00015401870402153388
    num_agent_steps_sampled: 2243000
    num_agent_steps_trained: 2243000
    num_steps_sampled: 2243000
    num_steps_trained: 2243000
  iterations_since_restore: 2243
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2243,56062.9,2243000,0,0,0,380.02


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2244000
  custom_metrics: {}
  date: 2021-10-09_13-59-36
  done: false
  episode_len_mean: 380.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6267
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6367765625317892
          entropy_coeff: 0.009999999999999998
          kl: 0.007428295072259772
          policy_loss: -0.08510550076348913
          total_loss: -0.0910161926307612
          vf_explained_var: -0.5209146738052368
          vf_loss: 1.2798490449010084e-05
    num_agent_steps_sampled: 2244000
    num_agent_steps_trained: 2244000
    num_steps_sampled: 2244000
    num_steps_trained: 2244000
  iterations_since_restore: 2244
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2244,56086.3,2244000,0,0,0,380.77




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2245000
  custom_metrics: {}
  date: 2021-10-09_14-00-15
  done: false
  episode_len_mean: 378.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6270
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8654986474249098
          entropy_coeff: 0.009999999999999998
          kl: 0.009849620918517448
          policy_loss: -0.06442684253884687
          total_loss: -0.06922127078804705
          vf_explained_var: -0.6499230861663818
          vf_loss: 1.1869202671732637e-05
    num_agent_steps_sampled: 2245000
    num_agent_steps_trained: 2245000
    num_steps_sampled: 2245000
    num_steps_trained: 2245000
  iterations_since_restore: 2245
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2245,56125.9,2245000,0,0,0,378.01


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2246000
  custom_metrics: {}
  date: 2021-10-09_14-00-37
  done: false
  episode_len_mean: 377.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6273
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8063731882307263
          entropy_coeff: 0.009999999999999998
          kl: 0.008580647483535467
          policy_loss: -0.07044761218130588
          total_loss: -0.07643926913539568
          vf_explained_var: -0.7302455306053162
          vf_loss: 7.578211986785593e-06
    num_agent_steps_sampled: 2246000
    num_agent_steps_trained: 2246000
    num_steps_sampled: 2246000
    num_steps_trained: 2246000
  iterations_since_restore: 2246
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2246,56147.3,2246000,0,0,0,377.67


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2247000
  custom_metrics: {}
  date: 2021-10-09_14-01-01
  done: false
  episode_len_mean: 376.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6276
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6707336372799344
          entropy_coeff: 0.009999999999999998
          kl: 0.006919721653684184
          policy_loss: -0.004134282593925794
          total_loss: -0.01108057198839055
          vf_explained_var: -1.0
          vf_loss: 3.183141703225879e-05
    num_agent_steps_sampled: 2247000
    num_agent_steps_trained: 2247000
    num_steps_sampled: 2247000
    num_steps_trained: 2247000
  iterations_since_restore: 2247
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2247,56172.1,2247000,0,0,0,376.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2248000
  custom_metrics: {}
  date: 2021-10-09_14-01-24
  done: false
  episode_len_mean: 377.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6278
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8118711233139038
          entropy_coeff: 0.009999999999999998
          kl: 0.010437118452308604
          policy_loss: -0.1006677571270201
          total_loss: -0.10408751567204794
          vf_explained_var: -0.5833104252815247
          vf_loss: 2.423527146978207e-05
    num_agent_steps_sampled: 2248000
    num_agent_steps_trained: 2248000
    num_steps_sampled: 2248000
    num_steps_trained: 2248000
  iterations_since_restore: 2248
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2248,56194.4,2248000,0,0,0,377


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2249000
  custom_metrics: {}
  date: 2021-10-09_14-01-44
  done: false
  episode_len_mean: 377.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6281
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8167284078068204
          entropy_coeff: 0.009999999999999998
          kl: 0.007101877595263457
          policy_loss: -0.07159368679341342
          total_loss: -0.07973253884249264
          vf_explained_var: -0.8082687258720398
          vf_loss: 4.310266212996794e-05
    num_agent_steps_sampled: 2249000
    num_agent_steps_trained: 2249000
    num_steps_sampled: 2249000
    num_steps_trained: 2249000
  iterations_since_restore: 2249
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2249,56215.1,2249000,0,0,0,377.82


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2250000
  custom_metrics: {}
  date: 2021-10-09_14-02-07
  done: false
  episode_len_mean: 378.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6283
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.714948915110694
          entropy_coeff: 0.009999999999999998
          kl: 0.008162670714412151
          policy_loss: -0.06401249793254667
          total_loss: -0.06966791780044636
          vf_explained_var: -0.2982684373855591
          vf_loss: 1.725052559575286e-05
    num_agent_steps_sampled: 2250000
    num_agent_steps_trained: 2250000
    num_steps_sampled: 2250000
    num_steps_trained: 2250000
  iterations_since_restore: 2250
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2250,56237.5,2250000,0,0,0,378.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2251000
  custom_metrics: {}
  date: 2021-10-09_14-02-27
  done: false
  episode_len_mean: 377.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6286
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8277084443304275
          entropy_coeff: 0.009999999999999998
          kl: 0.007926648076904142
          policy_loss: -0.09277316467422578
          total_loss: -0.09985281740211778
          vf_explained_var: -0.9462894797325134
          vf_loss: 5.246529772193753e-05
    num_agent_steps_sampled: 2251000
    num_agent_steps_trained: 2251000
    num_steps_sampled: 2251000
    num_steps_trained: 2251000
  iterations_since_restore: 2251
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2251,56257.9,2251000,0,0,0,377.66


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2252000
  custom_metrics: {}
  date: 2021-10-09_14-02-51
  done: false
  episode_len_mean: 376.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6288
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8899274561140271
          entropy_coeff: 0.009999999999999998
          kl: 0.010551163532492917
          policy_loss: -0.10234002740018898
          total_loss: -0.10638637894557582
          vf_explained_var: -0.36923184990882874
          vf_loss: 1.7853106823571984e-05
    num_agent_steps_sampled: 2252000
    num_agent_steps_trained: 2252000
    num_steps_sampled: 2252000
    num_steps_trained: 2252000
  iterations_since_restore: 2252
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2252,56281.8,2252000,0,0,0,376.57


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2253000
  custom_metrics: {}
  date: 2021-10-09_14-03-13
  done: false
  episode_len_mean: 375.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6291
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8420944703949822
          entropy_coeff: 0.009999999999999998
          kl: 0.009179900198971028
          policy_loss: -0.09774579761756791
          total_loss: -0.10322860133730703
          vf_explained_var: -0.8978180885314941
          vf_loss: 3.1086946546565744e-05
    num_agent_steps_sampled: 2253000
    num_agent_steps_trained: 2253000
    num_steps_sampled: 2253000
    num_steps_trained: 2253000
  iterations_since_restore: 2253
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2253,56304.1,2253000,0,0,0,375.65


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2254000
  custom_metrics: {}
  date: 2021-10-09_14-03-37
  done: false
  episode_len_mean: 373.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6294
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8508869965871175
          entropy_coeff: 0.009999999999999998
          kl: 0.010673415119829974
          policy_loss: -0.16834695155007973
          total_loss: -0.17182520983947647
          vf_explained_var: -0.22073043882846832
          vf_loss: 2.365687932473924e-05
    num_agent_steps_sampled: 2254000
    num_agent_steps_trained: 2254000
    num_steps_sampled: 2254000
    num_steps_trained: 2254000
  iterations_since_restore: 2254
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2254,56327.5,2254000,0,0,0,373.53


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2255000
  custom_metrics: {}
  date: 2021-10-09_14-04-01
  done: false
  episode_len_mean: 372.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6297
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.87867607805464
          entropy_coeff: 0.009999999999999998
          kl: 0.008205755545362653
          policy_loss: -0.0975700098193354
          total_loss: -0.10480752769443724
          vf_explained_var: -0.3010036051273346
          vf_loss: 1.1848353652668367e-05
    num_agent_steps_sampled: 2255000
    num_agent_steps_trained: 2255000
    num_steps_sampled: 2255000
    num_steps_trained: 2255000
  iterations_since_restore: 2255
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2255,56351.3,2255000,0,0,0,372.76




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2256000
  custom_metrics: {}
  date: 2021-10-09_14-04-46
  done: false
  episode_len_mean: 368.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6300
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.5631233281559413
          entropy_coeff: 0.009999999999999998
          kl: 0.008154623427978002
          policy_loss: -0.06745905834767553
          total_loss: -0.07130530215799809
          vf_explained_var: -0.46795666217803955
          vf_loss: 0.0003194849593910476
    num_agent_steps_sampled: 2256000
    num_agent_steps_trained: 2256000
    num_steps_sampled: 2256000
    num_steps_trained: 2256000
  iterations_since_restore: 2256
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2256,56396.1,2256000,0,0,0,368.16


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2257000
  custom_metrics: {}
  date: 2021-10-09_14-05-06
  done: false
  episode_len_mean: 367.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6302
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7745965282122294
          entropy_coeff: 0.009999999999999998
          kl: 0.00849943959951393
          policy_loss: -0.06263545321093665
          total_loss: -0.06837875880300999
          vf_explained_var: -0.4489000141620636
          vf_loss: 5.2341118625539214e-05
    num_agent_steps_sampled: 2257000
    num_agent_steps_trained: 2257000
    num_steps_sampled: 2257000
    num_steps_trained: 2257000
  iterations_since_restore: 2257
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2257,56416.6,2257000,0,0,0,367.1


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2258000
  custom_metrics: {}
  date: 2021-10-09_14-05-32
  done: false
  episode_len_mean: 366.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6305
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6427990595499675
          entropy_coeff: 0.009999999999999998
          kl: 0.009087478558300097
          policy_loss: -0.15031707121266258
          total_loss: -0.15394722873138056
          vf_explained_var: 0.4217391908168793
          vf_loss: 2.072348175311668e-05
    num_agent_steps_sampled: 2258000
    num_agent_steps_trained: 2258000
    num_steps_sampled: 2258000
    num_steps_trained: 2258000
  iterations_since_restore: 2258
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2258,56442.9,2258000,0,0,0,366.27


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2259000
  custom_metrics: {}
  date: 2021-10-09_14-05-55
  done: false
  episode_len_mean: 366.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6308
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8463080353207058
          entropy_coeff: 0.009999999999999998
          kl: 0.0099653507766083
          policy_loss: -0.07353064459231165
          total_loss: -0.07796283778217104
          vf_explained_var: -0.5653119683265686
          vf_loss: 1.947909445839792e-05
    num_agent_steps_sampled: 2259000
    num_agent_steps_trained: 2259000
    num_steps_sampled: 2259000
    num_steps_trained: 2259000
  iterations_since_restore: 2259
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2259,56465.1,2259000,0,0,0,366.3


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2260000
  custom_metrics: {}
  date: 2021-10-09_14-06-15
  done: false
  episode_len_mean: 367.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6310
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6718483606974284
          entropy_coeff: 0.009999999999999998
          kl: 0.008566020243010236
          policy_loss: 0.0393687070450849
          total_loss: 0.03471669976909955
          vf_explained_var: -0.405783087015152
          vf_loss: 2.2544431218799824e-05
    num_agent_steps_sampled: 2260000
    num_agent_steps_trained: 2260000
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
  iterations_since_restore: 2260
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2260,56485.1,2260000,0,0,0,367.28


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2261000
  custom_metrics: {}
  date: 2021-10-09_14-06-39
  done: false
  episode_len_mean: 369.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6313
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8148566563924153
          entropy_coeff: 0.009999999999999998
          kl: 0.011387931352313332
          policy_loss: -0.10150761020680268
          total_loss: -0.10361958783533838
          vf_explained_var: -0.597983717918396
          vf_loss: 2.5014557458133723e-05
    num_agent_steps_sampled: 2261000
    num_agent_steps_trained: 2261000
    num_steps_sampled: 2261000
    num_steps_trained: 2261000
  iterations_since_restore: 2261
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2261,56509.1,2261000,0,0,0,369.89


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2262000
  custom_metrics: {}
  date: 2021-10-09_14-07-02
  done: false
  episode_len_mean: 370.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6316
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.865251863002777
          entropy_coeff: 0.009999999999999998
          kl: 0.008524083154706198
          policy_loss: -0.05283793082667722
          total_loss: -0.059488881865723264
          vf_explained_var: -0.8306257724761963
          vf_loss: 1.6599189878737284e-05
    num_agent_steps_sampled: 2262000
    num_agent_steps_trained: 2262000
    num_steps_sampled: 2262000
    num_steps_trained: 2262000
  iterations_since_restore: 2262
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2262,56532.4,2262000,0,0,0,370


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2263000
  custom_metrics: {}
  date: 2021-10-09_14-07-27
  done: false
  episode_len_mean: 368.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6319
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8272280851999918
          entropy_coeff: 0.009999999999999998
          kl: 0.009775882301177161
          policy_loss: -0.005818870787819227
          total_loss: -0.010310092071692149
          vf_explained_var: -0.9684399366378784
          vf_loss: 3.604629894956209e-05
    num_agent_steps_sampled: 2263000
    num_agent_steps_trained: 2263000
    num_steps_sampled: 2263000
    num_steps_trained: 2263000
  iterations_since_restore: 2263
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2263,56557.2,2263000,0,0,0,368.91


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2264000
  custom_metrics: {}
  date: 2021-10-09_14-07-51
  done: false
  episode_len_mean: 369.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6322
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.7891106208165486
          entropy_coeff: 0.009999999999999998
          kl: 0.010520618161027744
          policy_loss: -0.0728285350319412
          total_loss: -0.07590875273777378
          vf_explained_var: -0.7307590246200562
          vf_loss: 1.876878474023316e-05
    num_agent_steps_sampled: 2264000
    num_agent_steps_trained: 2264000
    num_steps_sampled: 2264000
    num_steps_trained: 2264000
  iterations_since_restore: 2264
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2264,56581.8,2264000,0,0,0,369.69


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2265000
  custom_metrics: {}
  date: 2021-10-09_14-08-14
  done: false
  episode_len_mean: 369.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6324
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8634067760573494
          entropy_coeff: 0.009999999999999998
          kl: 0.010759923900854713
          policy_loss: -0.07633849254084958
          total_loss: -0.07983341341217359
          vf_explained_var: -0.6860390305519104
          vf_loss: 1.0557759146144639e-05
    num_agent_steps_sampled: 2265000
    num_agent_steps_trained: 2265000
    num_steps_sampled: 2265000
    num_steps_trained: 2265000
  iterations_since_restore: 2265
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2265,56603.9,2265000,0,0,0,369.29


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2266000
  custom_metrics: {}
  date: 2021-10-09_14-08-37
  done: false
  episode_len_mean: 367.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6327
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6809401763810052
          entropy_coeff: 0.009999999999999998
          kl: 0.007678629555088179
          policy_loss: -0.05057104809416665
          total_loss: -0.05650867683192094
          vf_explained_var: -0.3254609704017639
          vf_loss: 7.552118143697348e-05
    num_agent_steps_sampled: 2266000
    num_agent_steps_trained: 2266000
    num_steps_sampled: 2266000
    num_steps_trained: 2266000
  iterations_since_restore: 2266
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2266,56627.3,2266000,0,0,0,367.64




Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2267000
  custom_metrics: {}
  date: 2021-10-09_14-09-22
  done: false
  episode_len_mean: 366.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6330
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.5271601412031386
          entropy_coeff: 0.009999999999999998
          kl: 0.00994839814612855
          policy_loss: -0.016706375032663347
          total_loss: -0.017970605856842466
          vf_explained_var: -0.30679625272750854
          vf_loss: 1.9799254808579765e-05
    num_agent_steps_sampled: 2267000
    num_agent_steps_trained: 2267000
    num_steps_sampled: 2267000
    num_steps_trained: 2267000
  iterations_since_restore: 2267


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2267,56672.7,2267000,0,0,0,366.41


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2268000
  custom_metrics: {}
  date: 2021-10-09_14-09-49
  done: false
  episode_len_mean: 364.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 6334
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6974533107545642
          entropy_coeff: 0.009999999999999998
          kl: 0.0073814462807389935
          policy_loss: -0.06324908547103406
          total_loss: -0.06981407509495814
          vf_explained_var: 0.0010850760154426098
          vf_loss: 3.1138829935015466e-05
    num_agent_steps_sampled: 2268000
    num_agent_steps_trained: 2268000
    num_steps_sampled: 2268000
    num_steps_trained: 2268000
  iterations_since_restore: 2268

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2268,56698.9,2268000,0,0,0,364.23


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2269000
  custom_metrics: {}
  date: 2021-10-09_14-10-11
  done: false
  episode_len_mean: 362.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 6336
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225307226181031
          entropy_coeff: 0.009999999999999998
          kl: 0.009138194326383124
          policy_loss: -0.08174182960970534
          total_loss: -0.08709399743626516
          vf_explained_var: -0.4979848563671112
          vf_loss: 2.4723173954852327e-05
    num_agent_steps_sampled: 2269000
    num_agent_steps_trained: 2269000
    num_steps_sampled: 2269000
    num_steps_trained: 2269000
  iterations_since_restore: 2269
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2269,56721.2,2269000,0,0,0,362.8


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2270000
  custom_metrics: {}
  date: 2021-10-09_14-10-36
  done: false
  episode_len_mean: 363.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6339
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8526235275798373
          entropy_coeff: 0.009999999999999998
          kl: 0.005698106654839129
          policy_loss: -0.05359576565937863
          total_loss: -0.06396399262464708
          vf_explained_var: -0.5291381478309631
          vf_loss: 0.00014639611999882617
    num_agent_steps_sampled: 2270000
    num_agent_steps_trained: 2270000
    num_steps_sampled: 2270000
    num_steps_trained: 2270000
  iterations_since_restore: 2270
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2270,56746.6,2270000,0,0,0,363.17


Result for PPO_my_env_6c5b8_00000:
  agent_timesteps_total: 2271000
  custom_metrics: {}
  date: 2021-10-09_14-11-00
  done: false
  episode_len_mean: 363.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 6342
  experiment_id: fec3e22c8569417c9ec22976fe85ed14
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.40601251232656
          cur_lr: 5.000000000000001e-05
          entropy: 1.812007607354058
          entropy_coeff: 0.009999999999999998
          kl: 0.005179332406223638
          policy_loss: 0.08376175527357393
          total_loss: 0.07300720279001527
          vf_explained_var: -0.1040421724319458
          vf_loss: 8.331598502182815e-05
    num_agent_steps_sampled: 2271000
    num_agent_steps_trained: 2271000
    num_steps_sampled: 2271000
    num_steps_trained: 2271000
  iterations_since_restore: 2271
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6c5b8_00000,RUNNING,192.168.3.5:40112,2271,56770.3,2271000,0,0,0,363.06
