In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C32 pretrained (AngelaCNN) (3 noops after placement)"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_c4eb6_00000,PENDING,


2021-10-08 15:09:44,567	INFO wandb.py:170 -- Already logged into W&B.
2021-10-08 15:09:44,578	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=213)[0m 2021-10-08 15:09:48,089	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=213)[0m 2021-10-08 15:09:48,090	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=213)[0m 2021-10-08 15:09:56,791	INFO trainable.py:109 -- Trainable.setup took 11.199 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-08_15-11-02
  done: false
  episode_len_mean: 423.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.75391415754954
          entropy_coeff: 0.009999999999999998
          kl: 0.013047522511651602
          policy_loss: -0.07453308064076636
          total_loss: -0.09316770566834344
          vf_explained_var: 0.47365933656692505
          vf_loss: 0.0062950101241262425
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,1,66.1091,1000,0,0,0,423


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-08_15-11-22
  done: false
  episode_len_mean: 421.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.722431707382202
          entropy_coeff: 0.009999999999999998
          kl: 0.012597103227755133
          policy_loss: -0.1291792506352067
          total_loss: -0.14846383455312914
          vf_explained_var: 0.09331028163433075
          vf_loss: 0.005420312057766649
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,2,86.0303,2000,0,0,0,421.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-08_15-11-41
  done: false
  episode_len_mean: 421.85714285714283
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7138678948084514
          entropy_coeff: 0.009999999999999998
          kl: 0.013443993380699442
          policy_loss: -0.07384034018549654
          total_loss: -0.09373285575873322
          vf_explained_var: 0.030553756281733513
          vf_loss: 0.004557364018789182
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,3,104.838,3000,0,0,0,421.857


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-08_15-12-01
  done: false
  episode_len_mean: 421.8888888888889
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6943951580259533
          entropy_coeff: 0.009999999999999998
          kl: 0.010635788737023042
          policy_loss: -0.04061764763254258
          total_loss: -0.0622682001027796
          vf_explained_var: -0.05338457599282265
          vf_loss: 0.0031662413358895317
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,4,124.364,4000,0,0,0,421.889


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-08_15-12-21
  done: false
  episode_len_mean: 419.90909090909093
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 11
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.688299865192837
          entropy_coeff: 0.009999999999999998
          kl: 0.01250895821218485
          policy_loss: -0.14078282540043194
          total_loss: -0.16361454207864073
          vf_explained_var: 0.19691674411296844
          vf_loss: 0.0015494911666286903
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,5,144.544,5000,0,0,0,419.909


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-08_15-12-42
  done: false
  episode_len_mean: 418.2857142857143
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 14
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6581474171744452
          entropy_coeff: 0.009999999999999998
          kl: 0.012614581941380099
          policy_loss: -0.06924359343118137
          total_loss: -0.09107043080859714
          vf_explained_var: -0.1405741423368454
          vf_loss: 0.0022317237738105986
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,6,165.467,6000,0,0,0,418.286


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-08_15-13-02
  done: false
  episode_len_mean: 416.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 16
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.671984084447225
          entropy_coeff: 0.009999999999999998
          kl: 0.013677547312679117
          policy_loss: -0.14313411919607055
          total_loss: -0.16445394007282124
          vf_explained_var: -0.8282077312469482
          vf_loss: 0.0026645082217227256
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,7,185.925,7000,0,0,0,416.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-08_15-13-23
  done: false
  episode_len_mean: 416.8421052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 19
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6481262975268893
          entropy_coeff: 0.009999999999999998
          kl: 0.010112771823538867
          policy_loss: -0.16511741479237874
          total_loss: -0.1880350376168887
          vf_explained_var: 0.5416609644889832
          vf_loss: 0.0015410831476199544
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,8,206.956,8000,0,0,0,416.842


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-08_15-13-44
  done: false
  episode_len_mean: 414.6190476190476
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 21
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.580951380729675
          entropy_coeff: 0.009999999999999998
          kl: 0.012326405196749985
          policy_loss: -0.0644924667560392
          total_loss: -0.08591784851418602
          vf_explained_var: -0.17501646280288696
          vf_loss: 0.0019188482676529223
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,9,227.561,9000,0,0,0,414.619


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-08_15-14-05
  done: false
  episode_len_mean: 415.125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 24
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.550762078497145
          entropy_coeff: 0.009999999999999998
          kl: 0.013565588826011535
          policy_loss: -0.09027705776194732
          total_loss: -0.1118733826196856
          vf_explained_var: 0.02386361174285412
          vf_loss: 0.001198175043423867
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,10,248.394,10000,0,0,0,415.125


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-08_15-14-25
  done: false
  episode_len_mean: 416.5769230769231
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5972038480970596
          entropy_coeff: 0.009999999999999998
          kl: 0.015441008787174247
          policy_loss: -0.05289385300129652
          total_loss: -0.07453854090223709
          vf_explained_var: 0.13316361606121063
          vf_loss: 0.001239146909210831
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,11,268.107,11000,0,0,0,416.577


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-08_15-14-45
  done: false
  episode_len_mean: 417.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.573284363746643
          entropy_coeff: 0.009999999999999998
          kl: 0.013552451747802912
          policy_loss: -0.08916212798406681
          total_loss: -0.1101001417885224
          vf_explained_var: -0.07213204354047775
          vf_loss: 0.002084337231159831
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,12,288.415,12000,0,0,0,417




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-08_15-15-24
  done: false
  episode_len_mean: 413.5806451612903
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 31
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5825312190585668
          entropy_coeff: 0.009999999999999998
          kl: 0.015120872491253915
          policy_loss: -0.07540992788142628
          total_loss: -0.09704839413364728
          vf_explained_var: 0.25896155834198
          vf_loss: 0.0011626726934789782
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,13,327.101,13000,0,0,0,413.581


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-08_15-15-43
  done: false
  episode_len_mean: 413.75757575757575
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 33
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5822434504826863
          entropy_coeff: 0.009999999999999998
          kl: 0.016389730797699117
          policy_loss: -0.04583663543065389
          total_loss: -0.06711434804730945
          vf_explained_var: 0.47074148058891296
          vf_loss: 0.0012667745849790257
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,14,346.698,14000,0,0,0,413.758


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-08_15-16-04
  done: false
  episode_len_mean: 412.69444444444446
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 36
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4996821085611978
          entropy_coeff: 0.009999999999999998
          kl: 0.01412253563520184
          policy_loss: -0.05924334828224447
          total_loss: -0.08066859489513768
          vf_explained_var: -0.050842709839344025
          vf_loss: 0.000747067631325788
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,15,366.967,15000,0,0,0,412.694


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-08_15-16-25
  done: false
  episode_len_mean: 411.10526315789474
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 38
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6022141853968304
          entropy_coeff: 0.009999999999999998
          kl: 0.01320600625556361
          policy_loss: -0.0674698702370127
          total_loss: -0.08965001023477978
          vf_explained_var: -0.44220301508903503
          vf_loss: 0.001200799625237576
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,16,388.158,16000,0,0,0,411.105


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-08_15-16-46
  done: false
  episode_len_mean: 410.3414634146341
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 41
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.531606266233656
          entropy_coeff: 0.009999999999999998
          kl: 0.01709730324160669
          policy_loss: -0.03537093504435486
          total_loss: -0.0560199450287554
          vf_explained_var: 0.5201541185379028
          vf_loss: 0.0012475915000929186
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,17,409.137,17000,0,0,0,410.341


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-08_15-17-08
  done: false
  episode_len_mean: 409.02272727272725
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 44
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5008445262908934
          entropy_coeff: 0.009999999999999998
          kl: 0.020221017541326484
          policy_loss: -0.05088463106917011
          total_loss: -0.07046633486946424
          vf_explained_var: -0.19587230682373047
          vf_loss: 0.0013825382190083878
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,18,431.097,18000,0,0,0,409.023


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-08_15-17-29
  done: false
  episode_len_mean: 408.60869565217394
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 46
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.527618079715305
          entropy_coeff: 0.009999999999999998
          kl: 0.012347411758027308
          policy_loss: -0.10336225032806397
          total_loss: -0.12389992165068785
          vf_explained_var: -0.44667497277259827
          vf_loss: 0.0010342858696175325
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,19,452.496,19000,0,0,0,408.609


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-08_15-17-50
  done: false
  episode_len_mean: 408.9375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 48
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.526723692152235
          entropy_coeff: 0.009999999999999998
          kl: 0.01514803273777775
          policy_loss: -0.13824496418237686
          total_loss: -0.15782277348140875
          vf_explained_var: -0.1366717368364334
          vf_loss: 0.001145014902835505
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,20,472.82,20000,0,0,0,408.938


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-08_15-18-10
  done: false
  episode_len_mean: 409.3529411764706
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 51
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.5117613236109415
          entropy_coeff: 0.009999999999999998
          kl: 0.014137051701775663
          policy_loss: -0.10581533573567867
          total_loss: -0.12564332907398543
          vf_explained_var: -0.48422786593437195
          vf_loss: 0.0010485045459871698
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,21,493.065,21000,0,0,0,409.353


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-08_15-18-31
  done: false
  episode_len_mean: 408.9433962264151
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 53
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.499843732515971
          entropy_coeff: 0.009999999999999998
          kl: 0.015970859881195665
          policy_loss: -0.05271011789639791
          total_loss: -0.07166555143064923
          vf_explained_var: -0.44425562024116516
          vf_loss: 0.0012517452750924146
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,22,514.184,22000,0,0,0,408.943


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-08_15-18-51
  done: false
  episode_len_mean: 409.55357142857144
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 56
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.496037377251519
          entropy_coeff: 0.009999999999999998
          kl: 0.016305639645904424
          policy_loss: -0.05138727186454667
          total_loss: -0.07069598121775521
          vf_explained_var: 0.24479635059833527
          vf_loss: 0.0007599728744632254
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,23,534.092,23000,0,0,0,409.554


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-08_15-19-11
  done: false
  episode_len_mean: 409.44827586206895
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 58
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.490700374709235
          entropy_coeff: 0.009999999999999998
          kl: 0.017111341490425057
          policy_loss: -0.11612036546899213
          total_loss: -0.13448448210126823
          vf_explained_var: -0.8529549241065979
          vf_loss: 0.0014094861993928336
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,24,554.497,24000,0,0,0,409.448




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-08_15-19-49
  done: false
  episode_len_mean: 409.2295081967213
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 61
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.5051515950096976
          entropy_coeff: 0.009999999999999998
          kl: 0.014181628162442787
          policy_loss: -0.06198268733504746
          total_loss: -0.08180220321648651
          vf_explained_var: -0.7264434099197388
          vf_loss: 0.0009775098400293953
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,25,592.092,25000,0,0,0,409.23


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-08_15-20-09
  done: false
  episode_len_mean: 409.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 63
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.5070088121626113
          entropy_coeff: 0.009999999999999998
          kl: 0.013922682369666046
          policy_loss: -0.03200952232711845
          total_loss: -0.05182940618445476
          vf_explained_var: -0.6755536198616028
          vf_loss: 0.0010733980316823969
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,26,612.542,26000,0,0,0,409


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-08_15-20-27
  done: false
  episode_len_mean: 409.44615384615383
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 65
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.5491653150982327
          entropy_coeff: 0.009999999999999998
          kl: 0.015705198745140456
          policy_loss: -0.14624242871585819
          total_loss: -0.16599797594050567
          vf_explained_var: 0.1819779872894287
          vf_loss: 0.0010245468367227457
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,27,630.342,27000,0,0,0,409.446


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-08_15-20-49
  done: false
  episode_len_mean: 409.80882352941177
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 68
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.4335214720831977
          entropy_coeff: 0.009999999999999998
          kl: 0.01435554945148121
          policy_loss: -0.08900729285346137
          total_loss: -0.10770770894984404
          vf_explained_var: -0.29649966955184937
          vf_loss: 0.0013281334136586844
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,28,651.998,28000,0,0,0,409.809


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-08_15-21-12
  done: false
  episode_len_mean: 409.2857142857143
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.320324198404948
          entropy_coeff: 0.009999999999999998
          kl: 0.016052671316030134
          policy_loss: -0.09594849869608879
          total_loss: -0.11318505133191745
          vf_explained_var: -0.3297210931777954
          vf_loss: 0.0011508864240669127
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,29,675.508,29000,0,0,0,409.286


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-08_15-21-35
  done: false
  episode_len_mean: 408.35616438356163
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.406226944923401
          entropy_coeff: 0.009999999999999998
          kl: 0.017120386939594615
          policy_loss: -0.06451144990407759
          total_loss: -0.08204127616352505
          vf_explained_var: 0.08716471493244171
          vf_loss: 0.0013963246769789193
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,30,698.014,30000,0,0,0,408.356


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-08_15-21-55
  done: false
  episode_len_mean: 408.81333333333333
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.4161370780732896
          entropy_coeff: 0.009999999999999998
          kl: 0.018420034195982326
          policy_loss: -0.06447459695239861
          total_loss: -0.08209984882010354
          vf_explained_var: -0.20420069992542267
          vf_loss: 0.001010109418227027
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,31,718.334,31000,0,0,0,408.813


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-08_15-22-16
  done: false
  episode_len_mean: 408.84615384615387
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 78
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3939179182052612
          entropy_coeff: 0.009999999999999998
          kl: 0.019601894289268767
          policy_loss: -0.0728805270873838
          total_loss: -0.08990642817484008
          vf_explained_var: -0.27063682675361633
          vf_loss: 0.0010327076461787025
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,32,739.363,32000,0,0,0,408.846


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-08_15-22-36
  done: false
  episode_len_mean: 409.8625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.4101211494869657
          entropy_coeff: 0.009999999999999998
          kl: 0.02066497120656514
          policy_loss: -0.0503890002767245
          total_loss: -0.06774364846448104
          vf_explained_var: -0.2327914834022522
          vf_loss: 0.000547071866134906
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,33,759.299,33000,0,0,0,409.863


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-08_15-22-59
  done: false
  episode_len_mean: 408.43373493975906
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 83
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.369122791290283
          entropy_coeff: 0.009999999999999998
          kl: 0.016928257507236293
          policy_loss: -0.03816678722699483
          total_loss: -0.053189370367262095
          vf_explained_var: -0.4777269661426544
          vf_loss: 0.0010509298257400386
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,34,781.79,34000,0,0,0,408.434


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-08_15-23-24
  done: false
  episode_len_mean: 406.8953488372093
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 86
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.330386514133877
          entropy_coeff: 0.009999999999999998
          kl: 0.015965958276609862
          policy_loss: -0.053087541295422445
          total_loss: -0.06090371939871046
          vf_explained_var: -0.6407071352005005
          vf_loss: 0.008303000228220805
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,35,807.119,35000,0,0,0,406.895


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-08_15-23-47
  done: false
  episode_len_mean: 406.1363636363636
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 88
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.390363510449727
          entropy_coeff: 0.009999999999999998
          kl: 0.013219266703856805
          policy_loss: -0.04326977365546757
          total_loss: -0.060575917777087956
          vf_explained_var: 0.41719529032707214
          vf_loss: 0.0006488206634660148
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,36,830.263,36000,0,0,0,406.136




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-08_15-24-27
  done: false
  episode_len_mean: 404.5054945054945
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 91
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3100219700071545
          entropy_coeff: 0.009999999999999998
          kl: 0.013502258608008372
          policy_loss: -0.033584527050455414
          total_loss: -0.0495913174831205
          vf_explained_var: -0.09738624095916748
          vf_loss: 0.0010174130011970798
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,37,869.51,37000,0,0,0,404.505


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-08_15-24-48
  done: false
  episode_len_mean: 403.6595744680851
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 94
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3939082675509984
          entropy_coeff: 0.009999999999999998
          kl: 0.011228461155507278
          policy_loss: -0.07347914286785656
          total_loss: -0.09115239278309875
          vf_explained_var: -0.4560646414756775
          vf_loss: 0.0012130250779187513
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,38,891.139,38000,0,0,0,403.66


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-08_15-25-10
  done: false
  episode_len_mean: 403.375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 96
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.372447458902995
          entropy_coeff: 0.009999999999999998
          kl: 0.014343710573770175
          policy_loss: -0.01425115644103951
          total_loss: -0.030627796136670642
          vf_explained_var: -0.35565096139907837
          vf_loss: 0.0008931635891915196
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,39,912.953,39000,0,0,0,403.375


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-08_15-25-32
  done: false
  episode_len_mean: 402.2828282828283
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 99
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3204543537563747
          entropy_coeff: 0.009999999999999998
          kl: 0.011664979172819933
          policy_loss: -0.14282531914197735
          total_loss: -0.15993298296299246
          vf_explained_var: -0.4970002770423889
          vf_loss: 0.0008476391373228075
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,40,935.032,40000,0,0,0,402.283


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-08_15-25-53
  done: false
  episode_len_mean: 402.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 101
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.333419911066691
          entropy_coeff: 0.009999999999999998
          kl: 0.011425954459008262
          policy_loss: -0.11984586368004481
          total_loss: -0.13744529452588822
          vf_explained_var: -0.7860745787620544
          vf_loss: 0.0005930860944014664
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,41,955.42,41000,0,0,0,402.63


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-08_15-26-14
  done: false
  episode_len_mean: 401.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 104
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3768936157226563
          entropy_coeff: 0.009999999999999998
          kl: 0.014107737200525172
          policy_loss: -0.054850105237629676
          total_loss: -0.07126599852409628
          vf_explained_var: -0.6091209053993225
          vf_loss: 0.001004562159262908
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,42,977.049,42000,0,0,0,401.13


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-08_15-26-33
  done: false
  episode_len_mean: 400.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 106
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.4671180221769546
          entropy_coeff: 0.009999999999999998
          kl: 0.013185844145071495
          policy_loss: -0.051812904493676294
          total_loss: -0.06979021289282375
          vf_explained_var: -0.3220367729663849
          vf_loss: 0.0007602410045769324
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,43,995.933,43000,0,0,0,400.55


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-08_15-26-58
  done: false
  episode_len_mean: 399.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 109
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2697643041610718
          entropy_coeff: 0.009999999999999998
          kl: 0.012655906928676448
          policy_loss: -0.07646236109236876
          total_loss: -0.09249729530678855
          vf_explained_var: -0.6756010055541992
          vf_loss: 0.0009675479775372272
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,44,1020.81,44000,0,0,0,399.06


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-08_15-27-20
  done: false
  episode_len_mean: 397.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 112
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3382974174287585
          entropy_coeff: 0.009999999999999998
          kl: 0.01641288227445303
          policy_loss: -0.034804716871844395
          total_loss: -0.04993203754226367
          vf_explained_var: -0.33705219626426697
          vf_loss: 0.0008698588696360174
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,45,1042.87,45000,0,0,0,397.81


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-08_15-27-40
  done: false
  episode_len_mean: 397.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3409555938508775
          entropy_coeff: 0.009999999999999998
          kl: 0.01166413812646053
          policy_loss: 0.012006347253918647
          total_loss: -0.005436339974403381
          vf_explained_var: 0.16586747765541077
          vf_loss: 0.0007180061514696313
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,46,1062.88,46000,0,0,0,397.07


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-08_15-28-02
  done: false
  episode_len_mean: 396.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3432159105936687
          entropy_coeff: 0.009999999999999998
          kl: 0.012896143475172532
          policy_loss: -0.09237645674082968
          total_loss: -0.10923393910957707
          vf_explained_var: 0.07146064937114716
          vf_loss: 0.0007714100460159696
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,47,1084.45,47000,0,0,0,396.35




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-08_15-28-40
  done: false
  episode_len_mean: 395.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.323434395260281
          entropy_coeff: 0.009999999999999998
          kl: 0.011133614691469281
          policy_loss: -0.06544215058286985
          total_loss: -0.08320697744687398
          vf_explained_var: -0.7088407278060913
          vf_loss: 0.00045939003919354745
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,48,1122.65,48000,0,0,0,395.58


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-08_15-29-02
  done: false
  episode_len_mean: 395.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 122
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.263989456494649
          entropy_coeff: 0.009999999999999998
          kl: 0.013627313707734463
          policy_loss: -0.0974124585174852
          total_loss: -0.113113080834349
          vf_explained_var: -0.5337077975273132
          vf_loss: 0.000806982902253771
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,49,1144.25,49000,0,0,0,395.53


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-08_15-29-25
  done: false
  episode_len_mean: 393.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.398398616578844
          entropy_coeff: 0.009999999999999998
          kl: 0.013027490163962756
          policy_loss: -0.13691056836396456
          total_loss: -0.15450175625996457
          vf_explained_var: -0.6091822385787964
          vf_loss: 0.0005304284241800714
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,50,1167.33,50000,0,0,0,393.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-08_15-29-45
  done: false
  episode_len_mean: 394.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 127
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2100488397810194
          entropy_coeff: 0.009999999999999998
          kl: 0.01185597930332533
          policy_loss: -0.039194484593139754
          total_loss: -0.0553234760546022
          vf_explained_var: -0.7969089150428772
          vf_loss: 0.0006363059584853343
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,51,1187.91,51000,0,0,0,394.25


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-08_15-30-09
  done: false
  episode_len_mean: 393.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 130
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3939916928609213
          entropy_coeff: 0.009999999999999998
          kl: 0.011826667609547157
          policy_loss: -0.10348692271444533
          total_loss: -0.12101281310121219
          vf_explained_var: -0.4846968650817871
          vf_loss: 0.0010920226389619832
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,52,1211.45,52000,0,0,0,393.78


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-08_15-30-32
  done: false
  episode_len_mean: 392.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 133
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.39494534863366
          entropy_coeff: 0.009999999999999998
          kl: 0.011723350913281156
          policy_loss: -0.1803598926299148
          total_loss: -0.1981760621070862
          vf_explained_var: -0.3764055371284485
          vf_loss: 0.0008577745921987419
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,53,1234.19,53000,0,0,0,392.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-08_15-30-54
  done: false
  episode_len_mean: 392.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 135
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3107956409454347
          entropy_coeff: 0.009999999999999998
          kl: 0.014237399555558186
          policy_loss: -0.04601718431545628
          total_loss: -0.06170841790735722
          vf_explained_var: -0.7568357586860657
          vf_loss: 0.0010098924161866308
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,54,1256.24,54000,0,0,0,392.61


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-08_15-31-16
  done: false
  episode_len_mean: 392.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 138
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3984121057722305
          entropy_coeff: 0.009999999999999998
          kl: 0.011818739374484149
          policy_loss: -0.06210493902779288
          total_loss: -0.08015384199097753
          vf_explained_var: -0.86616051197052
          vf_loss: 0.0006167854787135083
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,55,1278.51,55000,0,0,0,392.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-08_15-31-38
  done: false
  episode_len_mean: 392.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 140
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.34898964299096
          entropy_coeff: 0.009999999999999998
          kl: 0.011370198856243895
          policy_loss: -0.052779334359284905
          total_loss: -0.07059429738049706
          vf_explained_var: -0.9554060101509094
          vf_loss: 0.0005583458602713007
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,56,1300.3,56000,0,0,0,392.98


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-08_15-31-59
  done: false
  episode_len_mean: 393.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 143
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2980007621977063
          entropy_coeff: 0.009999999999999998
          kl: 0.011802948346666989
          policy_loss: -0.02411598474201229
          total_loss: -0.0404235754472514
          vf_explained_var: -0.9746467471122742
          vf_loss: 0.0013610894717405446
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,57,1321.92,57000,0,0,0,393.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-08_15-32-22
  done: false
  episode_len_mean: 392.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 145
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3279359367158676
          entropy_coeff: 0.009999999999999998
          kl: 0.01214146637849846
          policy_loss: -0.07595320112175412
          total_loss: -0.09271491091284487
          vf_explained_var: -0.8897081017494202
          vf_loss: 0.00105398857461599
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,58,1345.01,58000,0,0,0,392.68


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-08_15-32-44
  done: false
  episode_len_mean: 392.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 148
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3881861554251778
          entropy_coeff: 0.009999999999999998
          kl: 0.011877552812318587
          policy_loss: -0.0007701544505026606
          total_loss: -0.018629600604375202
          vf_explained_var: -0.5250116586685181
          vf_loss: 0.0006775164110068646
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,59,1366.63,59000,0,0,0,392.74




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-08_15-33-25
  done: false
  episode_len_mean: 392.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 150
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3332065025965374
          entropy_coeff: 0.009999999999999998
          kl: 0.013630282921526377
          policy_loss: -0.10816716606625253
          total_loss: -0.12444439504502548
          vf_explained_var: -0.5750746130943298
          vf_loss: 0.0009212113399472502
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,60,1407.33,60000,0,0,0,392.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-08_15-33-50
  done: false
  episode_len_mean: 390.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.378480648994446
          entropy_coeff: 0.009999999999999998
          kl: 0.0131592678317066
          policy_loss: -0.09685716595914629
          total_loss: -0.1138841567767991
          vf_explained_var: -0.3870663642883301
          vf_loss: 0.0008361448443287776
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,61,1432.01,61000,0,0,0,390.73


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-08_15-34-13
  done: false
  episode_len_mean: 388.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 156
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2022084765964083
          entropy_coeff: 0.009999999999999998
          kl: 0.01202232066572513
          policy_loss: -0.07082159970369604
          total_loss: -0.08617038586073451
          vf_explained_var: -0.8894367814064026
          vf_loss: 0.001263255022543793
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,62,1455.68,62000,0,0,0,388.94


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-08_15-34-36
  done: false
  episode_len_mean: 387.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 159
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.311572480201721
          entropy_coeff: 0.009999999999999998
          kl: 0.011759842562029811
          policy_loss: -0.07458106830923093
          total_loss: -0.09120743368855781
          vf_explained_var: -1.0
          vf_loss: 0.0011974291785413192
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,63,1478.27,63000,0,0,0,387.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-08_15-34-59
  done: false
  episode_len_mean: 387.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 161
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2852359188927545
          entropy_coeff: 0.009999999999999998
          kl: 0.013150578461375101
          policy_loss: -0.11313517118493716
          total_loss: -0.12878392247690096
          vf_explained_var: -0.7102711200714111
          vf_loss: 0.0012858461582153622
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,64,1501.53,64000,0,0,0,387.27


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-08_15-35-25
  done: false
  episode_len_mean: 385.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 164
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.118957993719313
          entropy_coeff: 0.009999999999999998
          kl: 0.012172124469002357
          policy_loss: -0.029587843517462412
          total_loss: -0.04451901134517458
          vf_explained_var: -0.7644549012184143
          vf_loss: 0.0007809563730714014
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,65,1527.2,65000,0,0,0,385.36


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-08_15-35-48
  done: false
  episode_len_mean: 383.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 167
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.031539367304908
          entropy_coeff: 0.009999999999999998
          kl: 0.015185017893906898
          policy_loss: -0.04911345371769534
          total_loss: -0.060745519834260145
          vf_explained_var: -0.6623243093490601
          vf_loss: 0.0018500694086671704
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,66,1550.25,66000,0,0,0,383.3


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-08_15-36-12
  done: false
  episode_len_mean: 382.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 170
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2521740436553954
          entropy_coeff: 0.009999999999999998
          kl: 0.013183085766644669
          policy_loss: -0.08257977523737484
          total_loss: -0.09819849125213093
          vf_explained_var: -0.31043893098831177
          vf_loss: 0.0009706358477059337
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,67,1574.86,67000,0,0,0,382


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-08_15-36-39
  done: false
  episode_len_mean: 380.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 173
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.129625732368893
          entropy_coeff: 0.009999999999999998
          kl: 0.011651506078355921
          policy_loss: -0.13640111097031168
          total_loss: -0.15172625912560356
          vf_explained_var: -0.6280807852745056
          vf_loss: 0.0007279316910878858
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,68,1601.34,68000,0,0,0,380.45


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-08_15-37-07
  done: false
  episode_len_mean: 377.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 176
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1706648561689588
          entropy_coeff: 0.009999999999999998
          kl: 0.012982020398925856
          policy_loss: -0.09711193396813339
          total_loss: -0.11230510688490337
          vf_explained_var: -0.7268186211585999
          vf_loss: 0.0006715627565022765
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,69,1629.43,69000,0,0,0,377.13


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-08_15-37-35
  done: false
  episode_len_mean: 374.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 179
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.126349014706082
          entropy_coeff: 0.009999999999999998
          kl: 0.013943297450059452
          policy_loss: -0.015715707176261477
          total_loss: -0.029476052108738156
          vf_explained_var: -0.08452221751213074
          vf_loss: 0.001228659035405144
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,70,1657.14,70000,0,0,0,374.52




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-08_15-38-17
  done: false
  episode_len_mean: 371.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 182
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1340115626653033
          entropy_coeff: 0.009999999999999998
          kl: 0.010940129662024727
          policy_loss: 0.012379792808658547
          total_loss: -0.003041872237291601
          vf_explained_var: -0.19989672303199768
          vf_loss: 0.0009953926058693064
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,71,1699.38,71000,0,0,0,371.65


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-08_15-38-42
  done: false
  episode_len_mean: 371.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 185
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.151399318377177
          entropy_coeff: 0.009999999999999998
          kl: 0.009824011260514463
          policy_loss: -0.15485681220889091
          total_loss: -0.1712997228735023
          vf_explained_var: -0.07219359278678894
          vf_loss: 0.0006502786263202627
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,72,1724.19,72000,0,0,0,371.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-08_15-39-09
  done: false
  episode_len_mean: 370.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 188
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1012557877434626
          entropy_coeff: 0.009999999999999998
          kl: 0.01506225215230423
          policy_loss: -0.11601667354504268
          total_loss: -0.12921479274001385
          vf_explained_var: -0.3720015585422516
          vf_loss: 0.0010364265531987056
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,73,1751.33,73000,0,0,0,370.94


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-08_15-39-35
  done: false
  episode_len_mean: 370.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 191
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.213610304726495
          entropy_coeff: 0.009999999999999998
          kl: 0.013198804036008996
          policy_loss: -0.06424454036686156
          total_loss: -0.0792712953976459
          vf_explained_var: -0.8533276319503784
          vf_loss: 0.0011698841780243027
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,74,1777.7,74000,0,0,0,370.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-08_15-40-03
  done: false
  episode_len_mean: 368.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 194
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.099246528413561
          entropy_coeff: 0.009999999999999998
          kl: 0.009285908846312729
          policy_loss: 0.0030442372585336368
          total_loss: -0.013296854371825853
          vf_explained_var: -0.5408296585083008
          vf_loss: 0.00047271388094587667
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,75,1804.81,75000,0,0,0,368.04


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-08_15-40-31
  done: false
  episode_len_mean: 366.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 197
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0675296836429173
          entropy_coeff: 0.009999999999999998
          kl: 0.00985552864715016
          policy_loss: 0.011157469492819575
          total_loss: -0.004349848400387499
          vf_explained_var: -1.0
          vf_loss: 0.0007329931049348993
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,76,1832.75,76000,0,0,0,366.45


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-08_15-40-57
  done: false
  episode_len_mean: 364.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 200
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9615869283676148
          entropy_coeff: 0.009999999999999998
          kl: 0.010607724636208321
          policy_loss: -0.07386157578892177
          total_loss: -0.0880402713186211
          vf_explained_var: -0.39094078540802
          vf_loss: 0.0006636973852033002
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,77,1859.49,77000,0,0,0,364.25


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-08_15-41-25
  done: false
  episode_len_mean: 361.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 204
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0268542329470316
          entropy_coeff: 0.009999999999999998
          kl: 0.012452718928832516
          policy_loss: -0.0543788036538495
          total_loss: -0.0682596823407544
          vf_explained_var: -0.5218468308448792
          vf_loss: 0.0007839406342504339
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,78,1886.8,78000,0,0,0,361.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-08_15-41-53
  done: false
  episode_len_mean: 359.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 207
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.02669517993927
          entropy_coeff: 0.009999999999999998
          kl: 0.012865922394928238
          policy_loss: -0.06604932463831371
          total_loss: -0.08010640541712442
          vf_explained_var: -0.9640890955924988
          vf_loss: 0.000420206713089202
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,79,1915.39,79000,0,0,0,359.04




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-08_15-42-34
  done: false
  episode_len_mean: 359.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 210
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1018105996979606
          entropy_coeff: 0.009999999999999998
          kl: 0.01181402277181932
          policy_loss: -0.042529111955728796
          total_loss: -0.057512490451335906
          vf_explained_var: -1.0
          vf_loss: 0.0007184158671104039
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,80,1955.83,80000,0,0,0,359.39


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-08_15-43-01
  done: false
  episode_len_mean: 357.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 213
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9882981671227349
          entropy_coeff: 0.009999999999999998
          kl: 0.013809579846798082
          policy_loss: -0.02044807822547025
          total_loss: -0.03360983516193099
          vf_explained_var: -0.9985558390617371
          vf_loss: 0.0005069129544103311
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,81,1983.44,81000,0,0,0,357.21


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-08_15-43-28
  done: false
  episode_len_mean: 355.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 216
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.033764640490214
          entropy_coeff: 0.009999999999999998
          kl: 0.008090480968220782
          policy_loss: -0.028593622893095017
          total_loss: -0.04483887387129168
          vf_explained_var: -0.8418025374412537
          vf_loss: 0.0004516796748955838
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,82,2009.76,82000,0,0,0,355.82


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-08_15-43-55
  done: false
  episode_len_mean: 354.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 219
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9859650943014358
          entropy_coeff: 0.009999999999999998
          kl: 0.013037975465780624
          policy_loss: -0.07730996968845526
          total_loss: -0.09073164011869166
          vf_explained_var: 0.0054681915789842606
          vf_loss: 0.0005708917723192523
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,83,2036.72,83000,0,0,0,354.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-08_15-44-20
  done: false
  episode_len_mean: 353.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 221
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0201540059513516
          entropy_coeff: 0.009999999999999998
          kl: 0.01396494270624925
          policy_loss: -0.04355205897655752
          total_loss: -0.056481967597372
          vf_explained_var: -0.5501288771629333
          vf_loss: 0.0009874071072166165
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,84,2062.14,84000,0,0,0,353.66


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-08_15-44-46
  done: false
  episode_len_mean: 352.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 224
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9060606784290737
          entropy_coeff: 0.009999999999999998
          kl: 0.012026417949332293
          policy_loss: -0.04012857228517532
          total_loss: -0.052760984417465
          vf_explained_var: -0.15935508906841278
          vf_loss: 0.001016307576921665
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,85,2088.36,85000,0,0,0,352.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-08_15-45-11
  done: false
  episode_len_mean: 351.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 227
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.050778365135193
          entropy_coeff: 0.009999999999999998
          kl: 0.01308138441834128
          policy_loss: -0.06633173755059639
          total_loss: -0.07936335330208143
          vf_explained_var: -0.7519276142120361
          vf_loss: 0.0015895457944781002
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,86,2112.52,86000,0,0,0,351.19


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-08_15-45-36
  done: false
  episode_len_mean: 350.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 230
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9918261766433716
          entropy_coeff: 0.009999999999999998
          kl: 0.01479128576435027
          policy_loss: -0.11718869391414855
          total_loss: -0.1293157728181945
          vf_explained_var: -0.1595516800880432
          vf_loss: 0.001135103910928592
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,87,2137.64,87000,0,0,0,350.31


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-08_15-46-02
  done: false
  episode_len_mean: 349.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 233
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9652743524975247
          entropy_coeff: 0.009999999999999998
          kl: 0.020461162715758215
          policy_loss: -0.022813485769761933
          total_loss: -0.03223744552168581
          vf_explained_var: -0.4247286021709442
          vf_loss: 0.0010212618569817602
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,88,2163.95,88000,0,0,0,349.04


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-08_15-46-26
  done: false
  episode_len_mean: 348.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 236
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.017232420709398
          entropy_coeff: 0.009999999999999998
          kl: 0.01186949327067638
          policy_loss: -0.08363336380571126
          total_loss: -0.09432551998438107
          vf_explained_var: -0.6284500956535339
          vf_loss: 0.001468260105725171
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,89,2187.76,89000,0,0,0,348.74


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-08_15-46-51
  done: false
  episode_len_mean: 347.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 238
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.041628642876943
          entropy_coeff: 0.009999999999999998
          kl: 0.008345708387975314
          policy_loss: -0.06612817908947667
          total_loss: -0.07994146180442638
          vf_explained_var: -0.7527444362640381
          vf_loss: 0.0009696526181263228
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,90,2212.8,90000,0,0,0,347.38




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-08_15-47-35
  done: false
  episode_len_mean: 346.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 241
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0229256934589808
          entropy_coeff: 0.009999999999999998
          kl: 0.011525887214964373
          policy_loss: -0.08429101417875952
          total_loss: -0.09567944024586016
          vf_explained_var: -0.9713106155395508
          vf_loss: 0.0010608567843317157
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,91,2256.49,91000,0,0,0,346.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-08_15-47-58
  done: false
  episode_len_mean: 344.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 244
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.050669099224938
          entropy_coeff: 0.009999999999999998
          kl: 0.009913413536455825
          policy_loss: -0.07488674236875441
          total_loss: -0.08749773076011075
          vf_explained_var: -0.5838000178337097
          vf_loss: 0.001204145629890263
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,92,2279.4,92000,0,0,0,344.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-08_15-48-20
  done: false
  episode_len_mean: 345.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 246
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0781247284677296
          entropy_coeff: 0.009999999999999998
          kl: 0.010999357993609552
          policy_loss: -0.12834735607935321
          total_loss: -0.13997093683315648
          vf_explained_var: -0.8668122887611389
          vf_loss: 0.0017331011160018129
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,93,2301.7,93000,0,0,0,345.33


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-08_15-48-41
  done: false
  episode_len_mean: 344.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 249
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0872020522753396
          entropy_coeff: 0.009999999999999998
          kl: 0.00970698391650077
          policy_loss: -0.09223619256582526
          total_loss: -0.10553994327783585
          vf_explained_var: -0.6143307685852051
          vf_loss: 0.0010160543244435555
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,94,2323.33,94000,0,0,0,344.81


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-08_15-49-04
  done: false
  episode_len_mean: 346.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 251
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.094086084100935
          entropy_coeff: 0.009999999999999998
          kl: 0.009609946921691944
          policy_loss: -0.059412944648000926
          total_loss: -0.07224447590609391
          vf_explained_var: -0.8031301498413086
          vf_loss: 0.0016226121735396898
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,95,2345.72,95000,0,0,0,346.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-08_15-49-29
  done: false
  episode_len_mean: 346.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 254
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.020629886786143
          entropy_coeff: 0.009999999999999998
          kl: 0.009385025334161265
          policy_loss: -0.047706222906708715
          total_loss: -0.060200561065640715
          vf_explained_var: -0.9624892473220825
          vf_loss: 0.001377066948528712
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,96,2370.32,96000,0,0,0,346.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-08_15-49-53
  done: false
  episode_len_mean: 347.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 257
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9920831282933553
          entropy_coeff: 0.009999999999999998
          kl: 0.010149464302460645
          policy_loss: -0.025297072571184902
          total_loss: -0.03735468251009782
          vf_explained_var: -0.8952181339263916
          vf_loss: 0.0010123315412783995
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,97,2394.9,97000,0,0,0,347.32


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-08_15-50-18
  done: false
  episode_len_mean: 346.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 260
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.129740741517809
          entropy_coeff: 0.009999999999999998
          kl: 0.009037086154373913
          policy_loss: -0.0695960679401954
          total_loss: -0.08417844788895713
          vf_explained_var: -0.9462442994117737
          vf_loss: 0.0006149933453545802
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,98,2420.04,98000,0,0,0,346.47


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-08_15-50-44
  done: false
  episode_len_mean: 345.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 263
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9996025376849704
          entropy_coeff: 0.009999999999999998
          kl: 0.00680825070301904
          policy_loss: -0.011809198434154193
          total_loss: -0.026696950404180422
          vf_explained_var: -0.8610448837280273
          vf_loss: 0.0005127041894916652
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,99,2445.89,99000,0,0,0,345.74


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-08_15-51-08
  done: false
  episode_len_mean: 345.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 265
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.078396893872155
          entropy_coeff: 0.009999999999999998
          kl: 0.008050139956411724
          policy_loss: -0.09165032915771007
          total_loss: -0.10635116948849625
          vf_explained_var: -1.0
          vf_loss: 0.0006492841568413294
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,100,2470.2,100000,0,0,0,345.93


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-08_15-51-32
  done: false
  episode_len_mean: 344.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 268
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.073288622167375
          entropy_coeff: 0.009999999999999998
          kl: 0.009792654079922553
          policy_loss: -0.08113576401438978
          total_loss: -0.0946448977622721
          vf_explained_var: -0.659798264503479
          vf_loss: 0.0006137120883472057
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,101,2493.93,101000,0,0,0,344.96




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-08_15-52-15
  done: false
  episode_len_mean: 345.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 271
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.075579411453671
          entropy_coeff: 0.009999999999999998
          kl: 0.010977888335242388
          policy_loss: -0.03702631988045242
          total_loss: -0.04934470194081465
          vf_explained_var: -0.9972339868545532
          vf_loss: 0.0010273358597057975
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,102,2536.25,102000,0,0,0,345.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-08_15-52-39
  done: false
  episode_len_mean: 347.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 274
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0663206418355307
          entropy_coeff: 0.009999999999999998
          kl: 0.01276951198249263
          policy_loss: -0.051002918328675956
          total_loss: -0.06195124698181947
          vf_explained_var: -0.6825546622276306
          vf_loss: 0.0010954564857658827
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,103,2560.69,103000,0,0,0,347.06


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-08_15-53-04
  done: false
  episode_len_mean: 347.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 277
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0119917816585966
          entropy_coeff: 0.009999999999999998
          kl: 0.013171500203250246
          policy_loss: -0.10276408221365677
          total_loss: -0.11231639278638694
          vf_explained_var: -0.6054362058639526
          vf_loss: 0.0016768408654671575
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,104,2585.36,104000,0,0,0,347.97


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-08_15-53-30
  done: false
  episode_len_mean: 349.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 280
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.012426393561893
          entropy_coeff: 0.009999999999999998
          kl: 0.008313928166831167
          policy_loss: -0.06250982876453134
          total_loss: -0.07629839554429055
          vf_explained_var: -0.6415514945983887
          vf_loss: 0.0007237941841594875
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,105,2611.32,105000,0,0,0,349.25


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-08_15-53-56
  done: false
  episode_len_mean: 348.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 283
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.008609566423628
          entropy_coeff: 0.009999999999999998
          kl: 0.014346447413302361
          policy_loss: -0.04667208832171228
          total_loss: -0.056092965851227444
          vf_explained_var: -0.22296996414661407
          vf_loss: 0.0009813672267935342
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,106,2637.62,106000,0,0,0,348.51


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-08_15-54-19
  done: false
  episode_len_mean: 349.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 285
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.972996007071601
          entropy_coeff: 0.009999999999999998
          kl: 0.0101312307871711
          policy_loss: -0.06151305432948801
          total_loss: -0.07281744041376643
          vf_explained_var: -0.5964726209640503
          vf_loss: 0.0015869922140457978
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,107,2661.05,107000,0,0,0,349.22


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-08_15-54-44
  done: false
  episode_len_mean: 349.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 288
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0376879215240478
          entropy_coeff: 0.009999999999999998
          kl: 0.007498500616671164
          policy_loss: -0.07745809165967835
          total_loss: -0.0921314523037937
          vf_explained_var: -1.0
          vf_loss: 0.0006420291300552587
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,108,2685.96,108000,0,0,0,349.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-08_15-55-08
  done: false
  episode_len_mean: 351.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 291
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0178037193086413
          entropy_coeff: 0.009999999999999998
          kl: 0.01210818581436405
          policy_loss: -0.0841218434067236
          total_loss: -0.09416904925472207
          vf_explained_var: -0.9831727743148804
          vf_loss: 0.0019578077118947274
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,109,2709.92,109000,0,0,0,351.48


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-08_15-55-35
  done: false
  episode_len_mean: 352.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 294
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8939390738805135
          entropy_coeff: 0.009999999999999998
          kl: 0.008450745476634955
          policy_loss: -0.05876888549990124
          total_loss: -0.07121734027233388
          vf_explained_var: -0.8683445453643799
          vf_loss: 0.0007866824880087127
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,110,2736.37,110000,0,0,0,352.52


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-08_15-55-59
  done: false
  episode_len_mean: 353.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 296
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0010560340351526
          entropy_coeff: 0.009999999999999998
          kl: 0.009784181589822591
          policy_loss: -0.07893535981679128
          total_loss: -0.09078714970706238
          vf_explained_var: -0.6935277581214905
          vf_loss: 0.0015544474913945629
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,111,2760.87,111000,0,0,0,353.19


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-08_15-56-22
  done: false
  episode_len_mean: 354.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 299
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.058118516869015
          entropy_coeff: 0.009999999999999998
          kl: 0.011498614316449959
          policy_loss: -0.06587399099436071
          total_loss: -0.07722117317219575
          vf_explained_var: -0.999880313873291
          vf_loss: 0.001472439045836735
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,112,2783.66,112000,0,0,0,354.97




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-08_15-57-03
  done: false
  episode_len_mean: 355.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 302
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.049188959598541
          entropy_coeff: 0.009999999999999998
          kl: 0.013460702035278634
          policy_loss: -0.07279273335718446
          total_loss: -0.08247647987057766
          vf_explained_var: -0.5438706278800964
          vf_loss: 0.0017221707061657475
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,113,2824.94,113000,0,0,0,355.93


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-08_15-57-26
  done: false
  episode_len_mean: 356.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 304
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9892760939068264
          entropy_coeff: 0.009999999999999998
          kl: 0.008535076372776314
          policy_loss: -0.03085453738975856
          total_loss: -0.04399068702219261
          vf_explained_var: -0.6341675519943237
          vf_loss: 0.0009954362774604103
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,114,2847.59,114000,0,0,0,356.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-08_15-57-50
  done: false
  episode_len_mean: 358.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 307
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9168065494961208
          entropy_coeff: 0.009999999999999998
          kl: 0.011181660895349112
          policy_loss: -0.11578376044829687
          total_loss: -0.12648277257879575
          vf_explained_var: -0.9923496842384338
          vf_loss: 0.0009214311378956255
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,115,2871.69,115000,0,0,0,358.4


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-08_15-58-13
  done: false
  episode_len_mean: 359.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 310
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9351397236188252
          entropy_coeff: 0.009999999999999998
          kl: 0.010919393980921496
          policy_loss: -0.06476453385419316
          total_loss: -0.07564275846299198
          vf_explained_var: -0.8159981369972229
          vf_loss: 0.0011025828629499302
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,116,2894.83,116000,0,0,0,359.53


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-08_15-58-38
  done: false
  episode_len_mean: 360.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 313
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9613182968563503
          entropy_coeff: 0.009999999999999998
          kl: 0.011075575705391492
          policy_loss: -0.0016202765206495922
          total_loss: -0.012058978734744919
          vf_explained_var: -0.879878580570221
          vf_loss: 0.0016984680855077587
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,117,2919.4,117000,0,0,0,360.26


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-08_15-59-02
  done: false
  episode_len_mean: 360.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 315
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6830743551254272
          entropy_coeff: 0.009999999999999998
          kl: 0.008989625375978161
          policy_loss: -0.047179419671495755
          total_loss: -0.057039399517493115
          vf_explained_var: -0.9175442457199097
          vf_loss: 0.0009027670525635282
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,118,2943.72,118000,0,0,0,360.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-08_15-59-26
  done: false
  episode_len_mean: 361.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 318
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8000671638382806
          entropy_coeff: 0.009999999999999998
          kl: 0.010833274107888811
          policy_loss: -0.09799516436954339
          total_loss: -0.10767935013605488
          vf_explained_var: -1.0
          vf_loss: 0.0010040273243147465
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,119,2967.57,119000,0,0,0,361.59


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-08_15-59-52
  done: false
  episode_len_mean: 361.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 321
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6981039841969807
          entropy_coeff: 0.009999999999999998
          kl: 0.010117847118348363
          policy_loss: -0.05913523469741146
          total_loss: -0.06827213043967882
          vf_explained_var: -0.2223641574382782
          vf_loss: 0.0010145969433425408
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,120,2993.07,120000,0,0,0,361.06


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-08_16-00-17
  done: false
  episode_len_mean: 362.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 324
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8635096033414205
          entropy_coeff: 0.009999999999999998
          kl: 0.012784977804102813
          policy_loss: -0.014217534102499485
          total_loss: -0.023072230960759853
          vf_explained_var: -0.999064564704895
          vf_loss: 0.001150537516999369
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,121,3017.98,121000,0,0,0,362.48


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-08_16-00-42
  done: false
  episode_len_mean: 360.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 327
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8487186551094055
          entropy_coeff: 0.009999999999999998
          kl: 0.010831478209350365
          policy_loss: -0.04441004635559188
          total_loss: -0.05502970880932278
          vf_explained_var: -0.9581928849220276
          vf_loss: 0.000556275351846125
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,122,3043.46,122000,0,0,0,360.77




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-08_16-01-24
  done: false
  episode_len_mean: 360.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 330
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8757915205425686
          entropy_coeff: 0.009999999999999998
          kl: 0.010500778927692838
          policy_loss: -0.06633069614569347
          total_loss: -0.07725923409064611
          vf_explained_var: -0.4676779806613922
          vf_loss: 0.0007413488651056671
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,123,3085.05,123000,0,0,0,360.19


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-08_16-01-48
  done: false
  episode_len_mean: 360.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 333
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.774088900619083
          entropy_coeff: 0.009999999999999998
          kl: 0.012740356955934907
          policy_loss: -0.04226596237470706
          total_loss: -0.050638385075661874
          vf_explained_var: -0.834460437297821
          vf_loss: 0.0007687258886613159
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,124,3108.92,124000,0,0,0,360.72


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-08_16-02-10
  done: false
  episode_len_mean: 361.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 335
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9699006610446506
          entropy_coeff: 0.009999999999999998
          kl: 0.011327438008526454
          policy_loss: -0.06469607231103712
          total_loss: -0.07517063099270066
          vf_explained_var: -0.5465576648712158
          vf_loss: 0.0015784270556954045
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,125,3131,125000,0,0,0,361.76


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-08_16-02-35
  done: false
  episode_len_mean: 360.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 338
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9214632127020095
          entropy_coeff: 0.009999999999999998
          kl: 0.00916837326338883
          policy_loss: -0.06431519018693102
          total_loss: -0.07680365633633401
          vf_explained_var: -0.6095467209815979
          vf_loss: 0.0005375132912174902
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,126,3156.08,126000,0,0,0,360.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-08_16-02-57
  done: false
  episode_len_mean: 361.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9666611207856073
          entropy_coeff: 0.009999999999999998
          kl: 0.010766756651733782
          policy_loss: -0.10381818239887555
          total_loss: -0.11524161468777391
          vf_explained_var: -0.7461637854576111
          vf_loss: 0.0009756154037960287
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,127,3178.32,127000,0,0,0,361.62


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-08_16-03-21
  done: false
  episode_len_mean: 362.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 343
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9473359333144293
          entropy_coeff: 0.009999999999999998
          kl: 0.012057715370748134
          policy_loss: -0.04358921224872271
          total_loss: -0.05367896196742852
          vf_explained_var: -0.906208872795105
          vf_loss: 0.0012446509544841117
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,128,3201.74,128000,0,0,0,362


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-08_16-03-44
  done: false
  episode_len_mean: 361.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 346
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8147942145665488
          entropy_coeff: 0.009999999999999998
          kl: 0.012781206781403768
          policy_loss: -0.10424014967348841
          total_loss: -0.1126998061935107
          vf_explained_var: -0.9418931603431702
          vf_loss: 0.001060970801497913
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,129,3224.79,129000,0,0,0,361.47


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-08_16-04-08
  done: false
  episode_len_mean: 359.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 349
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9814336127705043
          entropy_coeff: 0.009999999999999998
          kl: 0.007711762137010089
          policy_loss: -0.024486540464891328
          total_loss: -0.03834887974792057
          vf_explained_var: -0.6833171248435974
          vf_loss: 0.000746556765322263
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,130,3248.87,130000,0,0,0,359.89


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-08_16-04-32
  done: false
  episode_len_mean: 357.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 352
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7244645516077677
          entropy_coeff: 0.009999999999999998
          kl: 0.012212374456340866
          policy_loss: -0.027500499246848956
          total_loss: -0.03527086331612534
          vf_explained_var: -0.49338990449905396
          vf_loss: 0.0012309277171476019
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,131,3272.75,131000,0,0,0,357.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-08_16-04-54
  done: false
  episode_len_mean: 358.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 354
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9026631368531122
          entropy_coeff: 0.009999999999999998
          kl: 0.007535913636654495
          policy_loss: -0.05992359363784393
          total_loss: -0.07303492232329316
          vf_explained_var: -0.5456580519676208
          vf_loss: 0.00082856077933684
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,132,3294.89,132000,0,0,0,358.19


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-08_16-05-16
  done: false
  episode_len_mean: 359.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 357
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.831049789322747
          entropy_coeff: 0.009999999999999998
          kl: 0.010792789668387944
          policy_loss: 0.0059099497894446055
          total_loss: -0.0043239878283606635
          vf_explained_var: -0.48175960779190063
          vf_loss: 0.0007914247340522707
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,133,3317,133000,0,0,0,359.17




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-08_16-05-54
  done: false
  episode_len_mean: 360.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 360
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7706930094295077
          entropy_coeff: 0.009999999999999998
          kl: 0.010938650046164927
          policy_loss: -0.035592003208067685
          total_loss: -0.04454112557901276
          vf_explained_var: -0.1507227122783661
          vf_loss: 0.0013742175690519313
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,134,3355.25,134000,0,0,0,360.33


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-08_16-06-17
  done: false
  episode_len_mean: 361.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 362
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9597249137030708
          entropy_coeff: 0.009999999999999998
          kl: 0.011700025086939172
          policy_loss: -0.11245742729968494
          total_loss: -0.12298682940502961
          vf_explained_var: -0.013885941356420517
          vf_loss: 0.00117032554699108
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,135,3377.99,135000,0,0,0,361.57


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-08_16-06-37
  done: false
  episode_len_mean: 363.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 364
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.902323681778378
          entropy_coeff: 0.009999999999999998
          kl: 0.010008494824757488
          policy_loss: -0.09583292487594816
          total_loss: -0.10643246231807603
          vf_explained_var: -0.879984438419342
          vf_loss: 0.00166796484328289
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,136,3398.36,136000,0,0,0,363.05


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-08_16-06-58
  done: false
  episode_len_mean: 365.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 367
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.066269866625468
          entropy_coeff: 0.009999999999999998
          kl: 0.011201860967018758
          policy_loss: -0.025597623756362334
          total_loss: -0.03747038128268387
          vf_explained_var: -0.6434616446495056
          vf_loss: 0.001228685598471202
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,137,3419.15,137000,0,0,0,365.56


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-08_16-07-20
  done: false
  episode_len_mean: 365.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 369
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9777209454112583
          entropy_coeff: 0.009999999999999998
          kl: 0.009833630594754282
          policy_loss: -0.09427741907743944
          total_loss: -0.106684484001663
          vf_explained_var: -1.0
          vf_loss: 0.0007324426738907479
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,138,3441.09,138000,0,0,0,365.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-08_16-07-44
  done: false
  episode_len_mean: 366.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 372
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8075074619717069
          entropy_coeff: 0.009999999999999998
          kl: 0.010239368061413344
          policy_loss: -0.06519385452071826
          total_loss: -0.07527185045182705
          vf_explained_var: -0.47556135058403015
          vf_loss: 0.0010855048519766166
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,139,3464.52,139000,0,0,0,366.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-08_16-08-07
  done: false
  episode_len_mean: 366.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 375
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8695955740080938
          entropy_coeff: 0.009999999999999998
          kl: 0.013816794985918198
          policy_loss: -0.04665684344040023
          total_loss: -0.054384432112177215
          vf_explained_var: -0.18758919835090637
          vf_loss: 0.0016420287746263462
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,140,3487.72,140000,0,0,0,366.37


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-08_16-08-30
  done: false
  episode_len_mean: 366.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 378
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8701025168100993
          entropy_coeff: 0.009999999999999998
          kl: 0.011967055117687605
          policy_loss: -0.09688983923859067
          total_loss: -0.10588724915352132
          vf_explained_var: -0.35182538628578186
          vf_loss: 0.0016258550348639902
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,141,3510.97,141000,0,0,0,366.95


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-08_16-08-55
  done: false
  episode_len_mean: 366.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 380
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8604018264346653
          entropy_coeff: 0.009999999999999998
          kl: 0.0018391375467830232
          policy_loss: -0.2044549802939097
          total_loss: -0.22108006825049717
          vf_explained_var: -0.4950316846370697
          vf_loss: 0.0007375112737968771
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,142,3535.84,142000,0,0,0,366


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-08_16-09-22
  done: false
  episode_len_mean: 365.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 4
  episodes_total: 384
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8115908092922635
          entropy_coeff: 0.009999999999999998
          kl: 0.016539904220593905
          policy_loss: -0.09874152346617646
          total_loss: -0.11040772108568085
          vf_explained_var: -0.7954832911491394
          vf_loss: 0.0008674934419104829
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,143,3562.93,143000,-0.08,0,-8,365.95


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-08_16-09-44
  done: false
  episode_len_mean: 365.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 386
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0070921752187942
          entropy_coeff: 0.009999999999999998
          kl: 0.012666876704923865
          policy_loss: -0.04110445251895322
          total_loss: -0.0561793031791846
          vf_explained_var: -0.7453349828720093
          vf_loss: 0.0007209990040994145
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,144,3585.01,144000,-0.08,0,-8,365.43




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-08_16-10-27
  done: false
  episode_len_mean: 363.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 4
  episodes_total: 390
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8891740825441148
          entropy_coeff: 0.009999999999999998
          kl: 0.01162280161270017
          policy_loss: -0.08345173050959905
          total_loss: -0.09780054808490807
          vf_explained_var: -0.5611055493354797
          vf_loss: 0.000620228833415442
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,145,3628.24,145000,-0.08,0,-8,363.36


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-08_16-10-51
  done: false
  episode_len_mean: 364.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 392
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8684935225380792
          entropy_coeff: 0.009999999999999998
          kl: 0.016045459941130207
          policy_loss: -0.06502268698273433
          total_loss: -0.07656607294662131
          vf_explained_var: -0.5392154455184937
          vf_loss: 0.0017262049892451613
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,146,3651.42,146000,-0.08,0,-8,364.78


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-08_16-11-16
  done: false
  episode_len_mean: 365.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 395
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7339710778660244
          entropy_coeff: 0.009999999999999998
          kl: 0.020919437733518877
          policy_loss: -0.04381223618984222
          total_loss: -0.05220995143883758
          vf_explained_var: -0.3191450238227844
          vf_loss: 0.0018816863742863966
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,147,3676.36,147000,-0.08,0,-8,365.18


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-08_16-11-38
  done: false
  episode_len_mean: 366.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 397
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7905659291479323
          entropy_coeff: 0.009999999999999998
          kl: 0.009189111143494896
          policy_loss: 0.0057351806097560454
          total_loss: 0.01137965288427141
          vf_explained_var: -0.17967963218688965
          vf_loss: 0.018898147587767904
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,148,3699.27,148000,-0.12,0,-8,366.47


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-08_16-12-03
  done: false
  episode_len_mean: 367.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 400
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.907503158516354
          entropy_coeff: 0.009999999999999998
          kl: 0.014438992169845804
          policy_loss: -0.15468100735710727
          total_loss: -0.16390277637789646
          vf_explained_var: -0.05775044858455658
          vf_loss: 0.002543522083821396
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,149,3723.46,149000,-0.12,0,-8,367.53


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-08_16-12-31
  done: false
  episode_len_mean: 365.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 403
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.74394787285063
          entropy_coeff: 0.009999999999999998
          kl: 0.0093380619131728
          policy_loss: -0.05601600477885869
          total_loss: -0.06806549094617367
          vf_explained_var: -0.28943273425102234
          vf_loss: 0.0006625964474450383
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,150,3752.03,150000,-0.12,0,-8,365.49


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-08_16-12-56
  done: false
  episode_len_mean: 365.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 405
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9255290389060975
          entropy_coeff: 0.009999999999999998
          kl: 0.013358317233935659
          policy_loss: -0.05954233000261916
          total_loss: -0.07083589531895187
          vf_explained_var: -0.5105516314506531
          vf_loss: 0.0011990749134889078
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,151,3776.83,151000,-0.12,0,-8,365.19


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-08_16-13-19
  done: false
  episode_len_mean: 366.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 408
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.017670879099104
          entropy_coeff: 0.009999999999999998
          kl: 0.014030465654633773
          policy_loss: -0.042708634336789446
          total_loss: -0.05482237959901492
          vf_explained_var: -0.7114428877830505
          vf_loss: 0.0009600389069722345
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,152,3800.13,152000,-0.12,0,-8,366.13


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-08_16-13-45
  done: false
  episode_len_mean: 366.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 411
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8816739877065023
          entropy_coeff: 0.009999999999999998
          kl: 0.01287306546072722
          policy_loss: -0.11628696587350633
          total_loss: -0.1276578575372696
          vf_explained_var: -0.6382432579994202
          vf_loss: 0.0009288584153788785
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,153,3825.59,153000,-0.12,0,-8,366.84


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-08_16-14-09
  done: false
  episode_len_mean: 367.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 413
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8700995220078362
          entropy_coeff: 0.009999999999999998
          kl: 0.013347397021383782
          policy_loss: -0.07466699245075385
          total_loss: -0.08545459157062901
          vf_explained_var: -0.8352000713348389
          vf_loss: 0.0011562760427801144
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,154,3849.88,154000,-0.12,0,-8,367.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-08_16-14-33
  done: false
  episode_len_mean: 368.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 416
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7693500558535258
          entropy_coeff: 0.009999999999999998
          kl: 0.012544218292133083
          policy_loss: -0.13035258534881805
          total_loss: -0.13992878111700216
          vf_explained_var: -0.5697557330131531
          vf_loss: 0.0017667983865572346
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,155,3873.4,155000,-0.12,0,-8,368.31


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-08_16-14-57
  done: false
  episode_len_mean: 369.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 418
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6889174766010708
          entropy_coeff: 0.009999999999999998
          kl: 0.018175510453822173
          policy_loss: -0.020147471378246944
          total_loss: -0.02636060035891003
          vf_explained_var: -0.04526154696941376
          vf_loss: 0.0014746951741269893
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,156,3897.53,156000,-0.12,0,-8,369.28




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-08_16-15-37
  done: false
  episode_len_mean: 370.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 421
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7976036455896165
          entropy_coeff: 0.009999999999999998
          kl: 0.012571985017857478
          policy_loss: -0.14089007597002717
          total_loss: -0.15159029704001215
          vf_explained_var: -0.33863160014152527
          vf_loss: 0.00091124999582664
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,157,3937.6,157000,-0.12,0,-8,370.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-08_16-15-58
  done: false
  episode_len_mean: 369.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 423
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7687575976053873
          entropy_coeff: 0.009999999999999998
          kl: 0.013708436618571879
          policy_loss: -0.1018849387144049
          total_loss: -0.11173018821411662
          vf_explained_var: -0.5082760453224182
          vf_loss: 0.0009024321893876833
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,158,3958.61,158000,-0.12,0,-8,369.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-08_16-16-21
  done: false
  episode_len_mean: 371.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 426
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8829814195632935
          entropy_coeff: 0.009999999999999998
          kl: 0.012141412320974767
          policy_loss: -0.07506667807077368
          total_loss: -0.08713593504702051
          vf_explained_var: -0.3569673001766205
          vf_loss: 0.0006139648583484813
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,159,3981.09,159000,-0.12,0,-8,371.89


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-08_16-16-43
  done: false
  episode_len_mean: 373.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 429
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9913883288701375
          entropy_coeff: 0.009999999999999998
          kl: 0.010049314170669217
          policy_loss: -0.134492452070117
          total_loss: -0.1487791924013032
          vf_explained_var: -0.9994450807571411
          vf_loss: 0.0005396781435896022
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,160,4003.06,160000,-0.12,0,-8,373.76


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-08_16-17-04
  done: false
  episode_len_mean: 374.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 431
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.93893950647778
          entropy_coeff: 0.009999999999999998
          kl: 0.016079960172925272
          policy_loss: -0.09419868199361695
          total_loss: -0.1044052552845743
          vf_explained_var: -0.49022942781448364
          vf_loss: 0.001042341695736266
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,161,4024.57,161000,-0.12,0,-8,374.78


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-08_16-17-28
  done: false
  episode_len_mean: 373.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 434
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0564663264486525
          entropy_coeff: 0.009999999999999998
          kl: 0.011980418011633394
          policy_loss: -0.05987703483551741
          total_loss: -0.07374081556788749
          vf_explained_var: -0.9102351665496826
          vf_loss: 0.0006357941863825544
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,162,4048.46,162000,-0.12,0,-8,373.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-08_16-17-49
  done: false
  episode_len_mean: 374.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 437
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.041716957092285
          entropy_coeff: 0.009999999999999998
          kl: 0.013595140425460938
          policy_loss: -0.11370076835155488
          total_loss: -0.126710265999039
          vf_explained_var: -0.5231282711029053
          vf_loss: 0.0005251329699401847
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,163,4069.29,163000,-0.12,0,-8,374.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-08_16-18-10
  done: false
  episode_len_mean: 375.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 439
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9326165503925747
          entropy_coeff: 0.009999999999999998
          kl: 0.012319282618303045
          policy_loss: -0.0945716327884131
          total_loss: -0.10657772620519003
          vf_explained_var: -0.7224962711334229
          vf_loss: 0.0010834328801138326
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,164,4090.68,164000,-0.12,0,-8,375.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-08_16-18-33
  done: false
  episode_len_mean: 375.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 442
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.918850909339057
          entropy_coeff: 0.009999999999999998
          kl: 0.012318474264312387
          policy_loss: -0.03960621588759952
          total_loss: -0.05175724311007394
          vf_explained_var: -0.456969678401947
          vf_loss: 0.0008012541728223571
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,165,4113.12,165000,-0.12,0,-8,375.55


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-08_16-18-53
  done: false
  episode_len_mean: 375.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 444
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0257582743962605
          entropy_coeff: 0.009999999999999998
          kl: 0.009065531101438495
          policy_loss: -0.049743371374077264
          total_loss: -0.06472076210710738
          vf_explained_var: -0.04097602516412735
          vf_loss: 0.00069076677351
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,166,4133.16,166000,-0.12,0,-8,375.6


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-08_16-19-16
  done: false
  episode_len_mean: 376.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 447
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8880534423722162
          entropy_coeff: 0.009999999999999998
          kl: 0.011004306137442457
          policy_loss: 0.005990348176823722
          total_loss: -0.00670183797677358
          vf_explained_var: -0.5427919626235962
          vf_loss: 0.0006174160468314464
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,167,4156.25,167000,-0.12,0,-8,376




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-08_16-19-56
  done: false
  episode_len_mean: 375.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 450
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7258743127187093
          entropy_coeff: 0.009999999999999998
          kl: 0.015495417151805352
          policy_loss: -0.07915541173683273
          total_loss: -0.08733311345179876
          vf_explained_var: -0.56572026014328
          vf_loss: 0.0012364868773147464
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,168,4196.56,168000,-0.12,0,-8,375.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-08_16-20-19
  done: false
  episode_len_mean: 376.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 453
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.869236299726698
          entropy_coeff: 0.009999999999999998
          kl: 0.013846166457707396
          policy_loss: 3.914886878596412e-05
          total_loss: -0.010974485737582048
          vf_explained_var: -0.6485059261322021
          vf_loss: 0.0006691079547939201
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,169,4219.76,169000,-0.12,0,-8,376.31


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-08_16-20-42
  done: false
  episode_len_mean: 376.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 455
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8184724595811632
          entropy_coeff: 0.009999999999999998
          kl: 0.011835459756657703
          policy_loss: -0.03143194642745786
          total_loss: -0.04289322965261009
          vf_explained_var: -0.9890323877334595
          vf_loss: 0.0007317392866955035
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,170,4242.15,170000,-0.12,0,-8,376.11


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-08_16-21-04
  done: false
  episode_len_mean: 374.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 458
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5666803134812248
          entropy_coeff: 0.009999999999999998
          kl: 0.013452947386988651
          policy_loss: -0.07383636732896169
          total_loss: -0.08196709948695369
          vf_explained_var: -0.8850719928741455
          vf_loss: 0.0007255120219067774
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,171,4264.61,171000,-0.12,0,-8,374.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-08_16-21-29
  done: false
  episode_len_mean: 374.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 461
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6096415797869363
          entropy_coeff: 0.009999999999999998
          kl: 0.013585219769370759
          policy_loss: -0.04964802919162644
          total_loss: -0.057787385003434284
          vf_explained_var: -0.7732987999916077
          vf_loss: 0.0010795417442245202
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,172,4288.84,172000,-0.12,0,-8,374.36


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-08_16-21-50
  done: false
  episode_len_mean: 373.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 464
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8004816823535494
          entropy_coeff: 0.009999999999999998
          kl: 0.015773970799080227
          policy_loss: -0.03127457797527313
          total_loss: -0.04073488194909361
          vf_explained_var: -0.24859890341758728
          vf_loss: 0.0005589398658937878
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,173,4309.85,173000,-0.12,0,-8,373.5


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-08_16-22-11
  done: false
  episode_len_mean: 372.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 466
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7835818727811177
          entropy_coeff: 0.009999999999999998
          kl: 0.01961417071584132
          policy_loss: -0.03484274869163831
          total_loss: -0.04209214299917221
          vf_explained_var: -0.5073517560958862
          vf_loss: 0.0006567495810385379
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,174,4331.13,174000,-0.12,0,-8,372.72


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-08_16-22-33
  done: false
  episode_len_mean: 372.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 469
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8240267660882739
          entropy_coeff: 0.009999999999999998
          kl: 0.01125107347054648
          policy_loss: -0.025908034967465535
          total_loss: -0.03783278378347556
          vf_explained_var: -0.5730635523796082
          vf_loss: 0.0006196614325745031
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,175,4353.01,175000,-0.12,0,-8,372.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-08_16-22-53
  done: false
  episode_len_mean: 372.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 471
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9159700751304627
          entropy_coeff: 0.009999999999999998
          kl: 0.012899747934790115
          policy_loss: -0.03275435254391697
          total_loss: -0.04471689789659447
          vf_explained_var: -0.9232434034347534
          vf_loss: 0.0006666564586339518
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,176,4373.19,176000,-0.12,0,-8,372.39


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-08_16-23-16
  done: false
  episode_len_mean: 373.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 474
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6894914733039008
          entropy_coeff: 0.009999999999999998
          kl: 0.011215986265889752
          policy_loss: -0.08341115568247107
          total_loss: -0.09406996493538221
          vf_explained_var: -0.8895565271377563
          vf_loss: 0.0005580144905252382
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,177,4396.39,177000,-0.12,0,-8,373.37


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-08_16-23-38
  done: false
  episode_len_mean: 372.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 3
  episodes_total: 477
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8377912362416586
          entropy_coeff: 0.009999999999999998
          kl: 0.012650593009092938
          policy_loss: -0.060838012852602534
          total_loss: -0.07234264926777946
          vf_explained_var: -0.6160953044891357
          vf_loss: 0.00046891497396346594
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,178,4418.44,178000,-0.12,0,-8,372.97


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-08_16-23-59
  done: false
  episode_len_mean: 374.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 479
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7605139560169645
          entropy_coeff: 0.009999999999999998
          kl: 0.011803046712310965
          policy_loss: -0.048359200689527725
          total_loss: -0.059208303462300035
          vf_explained_var: -0.48473289608955383
          vf_loss: 0.0007807437854353338
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,179,4439.03,179000,-0.12,0,-8,374.55




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-08_16-24-35
  done: false
  episode_len_mean: 376.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 481
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8402733908759223
          entropy_coeff: 0.009999999999999998
          kl: 0.012407573137483219
          policy_loss: -0.06241836854153209
          total_loss: -0.0735968782669968
          vf_explained_var: -0.9442582726478577
          vf_loss: 0.0009428900065055738
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,180,4475.58,180000,-0.04,0,-4,376.09


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-08_16-24-55
  done: false
  episode_len_mean: 379.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 483
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8745234171549479
          entropy_coeff: 0.009999999999999998
          kl: 0.013519526465237242
          policy_loss: 0.019884487158722347
          total_loss: 0.00851240646508005
          vf_explained_var: -0.622148871421814
          vf_loss: 0.0005288925054224415
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,181,4495.52,181000,-0.04,0,-4,379.09


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-08_16-25-18
  done: false
  episode_len_mean: 381.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 486
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8125646763377719
          entropy_coeff: 0.009999999999999998
          kl: 0.011111836676392489
          policy_loss: -0.07088429414563709
          total_loss: -0.08285742584202024
          vf_explained_var: -0.7843126654624939
          vf_loss: 0.0005271505816684415
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,182,4518.36,182000,-0.04,0,-4,381.13


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-08_16-25-41
  done: false
  episode_len_mean: 382.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 488
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.914996980296241
          entropy_coeff: 0.009999999999999998
          kl: 0.012657902862891035
          policy_loss: -0.05756855391793781
          total_loss: -0.06968001599113147
          vf_explained_var: -0.8592236042022705
          vf_loss: 0.0006304428560219498
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,183,4541.54,183000,-0.04,0,-4,382.92


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-08_16-26-03
  done: false
  episode_len_mean: 385.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 491
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9159340182940165
          entropy_coeff: 0.009999999999999998
          kl: 0.019493736052246517
          policy_loss: -0.07411245240105523
          total_loss: -0.08203077297657728
          vf_explained_var: -0.5378494262695312
          vf_loss: 0.0013723151616027785
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,184,4563.09,184000,-0.04,0,-4,385.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-08_16-26-28
  done: false
  episode_len_mean: 384.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 493
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.813580032189687
          entropy_coeff: 0.009999999999999998
          kl: 0.010728915023941996
          policy_loss: -0.09485724303457473
          total_loss: -0.10683496449556616
          vf_explained_var: -0.9753943681716919
          vf_loss: 0.0007265658295687495
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,185,4588.08,185000,-0.04,0,-4,384.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-08_16-26-52
  done: false
  episode_len_mean: 385.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 496
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8731303956773546
          entropy_coeff: 0.009999999999999998
          kl: 0.013777822921426866
          policy_loss: -0.04893901691668563
          total_loss: -0.05970249507162306
          vf_explained_var: -0.41115808486938477
          vf_loss: 0.000992801008073406
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,186,4611.69,186000,-0.04,0,-4,385.69


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-08_16-27-17
  done: false
  episode_len_mean: 383.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 499
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.796680368317498
          entropy_coeff: 0.009999999999999998
          kl: 0.009253276634497808
          policy_loss: 0.002151023472348849
          total_loss: -0.010642760826481714
          vf_explained_var: -0.06492941826581955
          vf_loss: 0.0004885468389450883
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,187,4636.87,187000,0,0,0,383.56


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-08_16-27-38
  done: false
  episode_len_mean: 385.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 501
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8553280247582329
          entropy_coeff: 0.009999999999999998
          kl: 0.0028535067157017084
          policy_loss: -0.11337836252318488
          total_loss: -0.12947048081292045
          vf_explained_var: -1.0
          vf_loss: 0.0010165740412452983
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 192.168.3.5
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,188,4658.07,188000,0,0,0,385.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-08_16-27-59
  done: false
  episode_len_mean: 388.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 503
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7125278181499906
          entropy_coeff: 0.009999999999999998
          kl: 0.014771187963253635
          policy_loss: 0.000843747788005405
          total_loss: -0.0006524051229159037
          vf_explained_var: -0.15604214370250702
          vf_loss: 0.011890167816373934
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,189,4678.82,189000,-0.04,0,-4,388


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-08_16-28-22
  done: false
  episode_len_mean: 389.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 506
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9165257427427504
          entropy_coeff: 0.009999999999999998
          kl: 0.017436686618649194
          policy_loss: -0.08844248250954681
          total_loss: -0.10167718190285895
          vf_explained_var: 0.10656698048114777
          vf_loss: 0.0015168975754123596
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,190,4702.08,190000,-0.04,0,-4,389.08


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-08_16-28-45
  done: false
  episode_len_mean: 388.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 508
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.793864693906572
          entropy_coeff: 0.009999999999999998
          kl: 0.01286621434468525
          policy_loss: -0.10469502988788816
          total_loss: -0.11839988931185669
          vf_explained_var: -0.16438913345336914
          vf_loss: 0.0009770268477344265
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,191,4725.38,191000,-0.04,0,-4,388.68




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-08_16-29-19
  done: false
  episode_len_mean: 390.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 510
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0219843983650208
          entropy_coeff: 0.009999999999999998
          kl: 0.01999281092858642
          policy_loss: -0.12090170428984695
          total_loss: -0.1352092909730143
          vf_explained_var: -0.4499540627002716
          vf_loss: 0.000851576323232924
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,192,4759.21,192000,-0.04,0,-4,390.08


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-08_16-29-43
  done: false
  episode_len_mean: 390.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 513
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9886003600226507
          entropy_coeff: 0.009999999999999998
          kl: 0.014927328989922847
          policy_loss: -0.06485431635131439
          total_loss: -0.08021633304241631
          vf_explained_var: -0.995894193649292
          vf_loss: 0.0007455066981492565
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,193,4783.24,193000,-0.04,0,-4,390.94


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-08_16-30-05
  done: false
  episode_len_mean: 390.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 515
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5962148984273274
          entropy_coeff: 0.009999999999999998
          kl: 0.012446898532778682
          policy_loss: -0.07448152158823278
          total_loss: -0.08656591702666548
          vf_explained_var: -0.6995903253555298
          vf_loss: 0.0007271298336692983
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,194,4804.49,194000,-0.04,0,-4,390.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-08_16-30-24
  done: false
  episode_len_mean: 391.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 518
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8694313009579977
          entropy_coeff: 0.009999999999999998
          kl: 0.018883358435678708
          policy_loss: -0.05563349144326316
          total_loss: -0.06839246151761877
          vf_explained_var: -0.6191084384918213
          vf_loss: 0.0011554935216231065
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,195,4824.02,195000,-0.04,0,-4,391.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-08_16-30-46
  done: false
  episode_len_mean: 392.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 520
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7751324004597133
          entropy_coeff: 0.009999999999999998
          kl: 0.017291272408898686
          policy_loss: -0.08416059596670998
          total_loss: -0.09674132636023892
          vf_explained_var: -0.8577232956886292
          vf_loss: 0.0007937422649572707
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,196,4845.39,196000,-0.04,0,-4,392.23


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-08_16-31-06
  done: false
  episode_len_mean: 393.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 523
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568364593717787
          entropy_coeff: 0.009999999999999998
          kl: 0.012472003389562402
          policy_loss: 0.006733475459946526
          total_loss: 0.009796218077341715
          vf_explained_var: -0.45449286699295044
          vf_loss: 0.017474128399044275
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,197,4865.37,197000,-0.07,0,-4,393.85


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-08_16-31-25
  done: false
  episode_len_mean: 394.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 525
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6528635912471348
          entropy_coeff: 0.009999999999999998
          kl: 0.015973022109393695
          policy_loss: -0.049574008014880946
          total_loss: -0.05853008265193138
          vf_explained_var: -0.3964473307132721
          vf_loss: 0.0035293898946191704
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,198,4884.98,198000,-0.07,0,-4,394.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-08_16-31-45
  done: false
  episode_len_mean: 395.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 527
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9452433188756306
          entropy_coeff: 0.009999999999999998
          kl: 0.01836845116839435
          policy_loss: -0.03293694696492619
          total_loss: -0.04669422482450803
          vf_explained_var: -0.47084978222846985
          vf_loss: 0.0010456397330724737
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,199,4904.7,199000,-0.07,0,-4,395.09


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-08_16-32-05
  done: false
  episode_len_mean: 396.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 530
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8840511269039577
          entropy_coeff: 0.009999999999999998
          kl: 0.01750361333885798
          policy_loss: -0.012483936382664575
          total_loss: 0.10886496189567778
          vf_explained_var: -0.3897337317466736
          vf_loss: 0.13575881136995221
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,200,4924.28,200000,-0.13,0,-6,396.15


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-08_16-32-23
  done: false
  episode_len_mean: 397.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 532
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7976519677374099
          entropy_coeff: 0.009999999999999998
          kl: 0.009023269195554937
          policy_loss: -0.23797663930389618
          total_loss: -0.23440036709523862
          vf_explained_var: -0.3551900088787079
          vf_loss: 0.019268778271766172
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,201,4942.84,201000,-0.13,0,-6,397.72


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-08_16-32-47
  done: false
  episode_len_mean: 398.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 535
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6511035137706334
          entropy_coeff: 0.009999999999999998
          kl: 0.016334514782069418
          policy_loss: -0.08431878536939622
          total_loss: -0.08386729641093148
          vf_explained_var: 0.0219492856413126
          vf_loss: 0.012827847727263968
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,202,4966.48,202000,-0.19,0,-6,398.71


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-08_16-33-07
  done: false
  episode_len_mean: 398.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 537
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8893209430906508
          entropy_coeff: 0.009999999999999998
          kl: 0.015920360325301273
          policy_loss: -0.1382987627138694
          total_loss: -0.14290555069843927
          vf_explained_var: -0.4388292133808136
          vf_loss: 0.010256576554901484
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,203,4987.01,203000,-0.19,0,-6,398.86


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-08_16-33-25
  done: false
  episode_len_mean: 401.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 539
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9245957679218717
          entropy_coeff: 0.009999999999999998
          kl: 0.015281436856833391
          policy_loss: -0.0883073755643434
          total_loss: -0.0980241929491361
          vf_explained_var: -0.2684766948223114
          vf_loss: 0.005661026011997213
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,204,5004.37,204000,-0.19,0,-6,401.04




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-08_16-34-01
  done: false
  episode_len_mean: 401.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 541
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7908491863144769
          entropy_coeff: 0.009999999999999998
          kl: 0.011493747314340337
          policy_loss: 0.03143513624866803
          total_loss: 0.018987955235772663
          vf_explained_var: -0.694196879863739
          vf_loss: 0.0025519562587659394
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,205,5040.58,205000,-0.19,0,-6,401.65


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-08_16-34-24
  done: false
  episode_len_mean: 402.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 3
  episodes_total: 544
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7747568607330322
          entropy_coeff: 0.009999999999999998
          kl: 0.01775701846857917
          policy_loss: -0.02722371473080582
          total_loss: -0.03813681486580107
          vf_explained_var: -0.41287147998809814
          vf_loss: 0.002339724969998416
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,206,5063.52,206000,-0.19,0,-6,402.4


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-08_16-34-43
  done: false
  episode_len_mean: 403.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 546
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.867535079850091
          entropy_coeff: 0.009999999999999998
          kl: 0.005996923168881402
          policy_loss: -0.2025565465291341
          total_loss: -0.21808939708603753
          vf_explained_var: -0.8204056620597839
          vf_loss: 0.0016245265458969193
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,207,5083.08,207000,-0.19,0,-6,403.4


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-08_16-35-05
  done: false
  episode_len_mean: 405.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.34
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 549
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7977701942125957
          entropy_coeff: 0.009999999999999998
          kl: 0.006264823519233195
          policy_loss: -0.2509004000160429
          total_loss: -0.26173103319274055
          vf_explained_var: -0.6482966542243958
          vf_loss: 0.005561284533986408
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,208,5104.16,208000,-0.34,0,-10,405.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-08_16-35-26
  done: false
  episode_len_mean: 406.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.42
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 551
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8114952445030212
          entropy_coeff: 0.009999999999999998
          kl: 0.01240582076146727
          policy_loss: -0.0888348022268878
          total_loss: 0.015283851656648847
          vf_explained_var: -0.4257977306842804
          vf_loss: 0.11909338425224027
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,209,5125.21,209000,-0.42,0,-10,406.85


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-08_16-35-45
  done: false
  episode_len_mean: 407.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.44
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 553
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7721235858069526
          entropy_coeff: 0.009999999999999998
          kl: 0.013898697390054417
          policy_loss: -0.19870787254638142
          total_loss: -0.16446483896838293
          vf_explained_var: -0.13102160394191742
          vf_loss: 0.048446161369793114
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,210,5144.94,210000,-0.44,0,-10,407.67


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-08_16-36-04
  done: false
  episode_len_mean: 409.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.54
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 555
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8072317242622375
          entropy_coeff: 0.009999999999999998
          kl: 0.017839342323170172
          policy_loss: 0.07724501296050018
          total_loss: 0.13529977647380698
          vf_explained_var: 0.05180232971906662
          vf_loss: 0.07161149241858059
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,211,5163.71,211000,-0.54,0,-10,409.18


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-08_16-36-27
  done: false
  episode_len_mean: 410.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.58
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 558
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6468134456210666
          entropy_coeff: 0.009999999999999998
          kl: 0.01708197206049111
          policy_loss: -0.04850892329381572
          total_loss: -0.017662012784017456
          vf_explained_var: -0.39359042048454285
          vf_loss: 0.04299116911004401
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,212,5186.72,212000,-0.58,0,-10,410.72


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-08_16-36-47
  done: false
  episode_len_mean: 411.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.58
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 560
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7445227874649896
          entropy_coeff: 0.009999999999999998
          kl: 0.01691609382672219
          policy_loss: -0.055647748584548636
          total_loss: -0.06335377279255125
          vf_explained_var: 0.3805069327354431
          vf_loss: 0.005457310229798572
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,213,5206.34,213000,-0.58,0,-10,411.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-08_16-37-08
  done: false
  episode_len_mean: 414.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.58
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 563
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9291408485836452
          entropy_coeff: 0.009999999999999998
          kl: 0.015038454542610009
          policy_loss: 0.03905247420900398
          total_loss: 0.027119079149431652
          vf_explained_var: 0.07517663389444351
          vf_loss: 0.0035514039284963573
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,214,5227.01,214000,-0.58,0,-10,414.94


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-08_16-37-29
  done: false
  episode_len_mean: 414.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 565
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9475851588779025
          entropy_coeff: 0.009999999999999998
          kl: 0.018350366571047882
          policy_loss: -0.05464676237251195
          total_loss: -0.030754617053187557
          vf_explained_var: -0.18536990880966187
          vf_loss: 0.0387230595599653
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,215,5248.21,215000,-0.6,0,-10,414.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-08_16-37-54
  done: false
  episode_len_mean: 413.98
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.56
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 568
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.895910045835707
          entropy_coeff: 0.009999999999999998
          kl: 0.010895333507071924
          policy_loss: -0.10083830257256826
          total_loss: -0.04733137521478865
          vf_explained_var: -0.43112173676490784
          vf_loss: 0.06970814282540232
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,216,5273.53,216000,-0.56,4,-10,413.98




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-08_16-38-37
  done: false
  episode_len_mean: 411.43
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.56
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 571
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.851579315132565
          entropy_coeff: 0.009999999999999998
          kl: 0.017188013461675103
          policy_loss: -0.0237536347988579
          total_loss: -0.02768353931605816
          vf_explained_var: -0.6210156083106995
          vf_loss: 0.010235172603279353
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,217,5316.28,217000,-0.56,4,-10,411.43


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-08_16-38-57
  done: false
  episode_len_mean: 412.0
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.56
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 573
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9049794289800857
          entropy_coeff: 0.009999999999999998
          kl: 0.008619140566857711
          policy_loss: -0.17532596877879567
          total_loss: -0.1866358315779103
          vf_explained_var: 0.044381819665431976
          vf_loss: 0.005558209726586938
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,218,5336.1,218000,-0.56,4,-10,412


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-08_16-39-19
  done: false
  episode_len_mean: 412.8
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: -0.54
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 576
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.901395144727495
          entropy_coeff: 0.009999999999999998
          kl: 0.02940529954112389
          policy_loss: -0.06739611004789671
          total_loss: 0.08129108018345303
          vf_explained_var: 0.052619755268096924
          vf_loss: 0.1602579291884063
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,219,5358.09,219000,-0.54,4,-10,412.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-08_16-39-42
  done: false
  episode_len_mean: 411.78
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.49
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 579
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.859428228272332
          entropy_coeff: 0.009999999999999998
          kl: 0.011907475475780352
          policy_loss: 0.087752657259504
          total_loss: 0.09260056347896656
          vf_explained_var: 0.26096105575561523
          vf_loss: 0.018921068838487067
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,220,5381.36,220000,-0.49,5,-10,411.78


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-08_16-40-03
  done: false
  episode_len_mean: 411.24
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.49
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 581
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8811166220241122
          entropy_coeff: 0.009999999999999998
          kl: 0.01179806042237986
          policy_loss: -0.019882534133891263
          total_loss: 0.034896889556613235
          vf_explained_var: 0.0766243115067482
          vf_loss: 0.06911101409544547
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,221,5401.86,221000,-0.49,5,-10,411.24


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-08_16-40-27
  done: false
  episode_len_mean: 407.42
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.53
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 584
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5677875571780735
          entropy_coeff: 0.009999999999999998
          kl: 0.018271048034552925
          policy_loss: -0.034873166845904455
          total_loss: 0.08265852077553669
          vf_explained_var: 0.6186473965644836
          vf_loss: 0.12627227467795213
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,222,5426.22,222000,-0.53,5,-10,407.42


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-08_16-40-47
  done: false
  episode_len_mean: 407.68
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.63
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 586
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8624824894799126
          entropy_coeff: 0.009999999999999998
          kl: 0.01578647029581774
          policy_loss: 0.04345868130524953
          total_loss: 0.10628441936439938
          vf_explained_var: -0.1018676832318306
          vf_loss: 0.07545664731620086
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,223,5445.79,223000,-0.63,5,-10,407.68


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-08_16-41-10
  done: false
  episode_len_mean: 406.42
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.65
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 589
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8810475521617465
          entropy_coeff: 0.009999999999999998
          kl: 0.016604036114689864
          policy_loss: -0.0801762249527706
          total_loss: 0.22793832366458244
          vf_explained_var: 0.5855964422225952
          vf_loss: 0.3206206783445345
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,224,5469.66,224000,-0.65,5,-10,406.42


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-08_16-41-31
  done: false
  episode_len_mean: 406.31
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.61
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 592
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.055989701218075
          entropy_coeff: 0.009999999999999998
          kl: 0.009056903396950835
          policy_loss: 0.07728389021423128
          total_loss: 0.07544606971657938
          vf_explained_var: -0.33321937918663025
          vf_loss: 0.0152832825978597
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,225,5490.69,225000,-0.61,5,-10,406.31


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-08_16-41-55
  done: false
  episode_len_mean: 404.8
  episode_media: {}
  episode_reward_max: 5.0
  episode_reward_mean: -0.73
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 595
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8565903902053833
          entropy_coeff: 0.009999999999999998
          kl: 0.012045721889927958
          policy_loss: 0.03179726716544893
          total_loss: 0.2190544192989667
          vf_explained_var: 0.7154343724250793
          vf_loss: 0.20124944316016302
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,226,5514.29,226000,-0.73,5,-10,404.8


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-08_16-42-20
  done: false
  episode_len_mean: 404.62
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.64
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 598
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.876578688621521
          entropy_coeff: 0.009999999999999998
          kl: 0.016889471054635764
          policy_loss: 0.053758107125759125
          total_loss: 0.18559402165313563
          vf_explained_var: 0.7790495157241821
          vf_loss: 0.1441889791438977
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,227,5539.54,227000,-0.64,6,-10,404.62




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-08_16-43-04
  done: false
  episode_len_mean: 401.14
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.62
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 601
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8527931213378905
          entropy_coeff: 0.009999999999999998
          kl: 0.00998217138412719
          policy_loss: -0.09111714793576134
          total_loss: -0.0788366542922126
          vf_explained_var: 0.40657779574394226
          vf_loss: 0.027018319349735975
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,228,5583.09,228000,-0.62,6,-10,401.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-08_16-43-28
  done: false
  episode_len_mean: 397.26
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.58
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 604
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9647395862473382
          entropy_coeff: 0.009999999999999998
          kl: 0.015198395936323718
          policy_loss: -0.09301686849859025
          total_loss: -0.0071224066325359875
          vf_explained_var: 0.33109092712402344
          vf_loss: 0.09977121698256168
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,229,5607.63,229000,-0.58,6,-10,397.26


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-08_16-43-54
  done: false
  episode_len_mean: 396.28
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.57
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 607
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8147312813334995
          entropy_coeff: 0.009999999999999998
          kl: 0.015893873904147544
          policy_loss: -0.10019887275993825
          total_loss: 0.03933009525967969
          vf_explained_var: 0.23511174321174622
          vf_loss: 0.15164157792977576
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,230,5633.29,230000,-0.57,6,-10,396.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-08_16-44-21
  done: false
  episode_len_mean: 392.61
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.62
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 610
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6818129195107354
          entropy_coeff: 0.009999999999999998
          kl: 0.018798413445011024
          policy_loss: 0.0602013542316854
          total_loss: 0.3516418524293436
          vf_explained_var: 0.21419993042945862
          vf_loss: 0.3011210998520255
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,231,5660.2,231000,-0.62,6,-10,392.61


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-08_16-44-42
  done: false
  episode_len_mean: 391.03
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.67
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 612
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7011392765574984
          entropy_coeff: 0.009999999999999998
          kl: 0.026788343345382347
          policy_loss: -0.034251856638325584
          total_loss: 0.33849589741892283
          vf_explained_var: 0.8358519077301025
          vf_loss: 0.3795879511369599
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,232,5681.21,232000,-0.67,6,-10,391.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-08_16-45-06
  done: false
  episode_len_mean: 391.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.67
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 615
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4873423059781392
          entropy_coeff: 0.009999999999999998
          kl: 0.012992772660528888
          policy_loss: -0.17804868585533565
          total_loss: 0.12697169677250916
          vf_explained_var: 0.8173605799674988
          vf_loss: 0.3124940147002538
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,233,5704.94,233000,-0.67,6,-10,391.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-08_16-45-24
  done: false
  episode_len_mean: 391.99
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 617
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.017648312780592
          entropy_coeff: 0.009999999999999998
          kl: 0.016783737684333923
          policy_loss: -0.14429269458891616
          total_loss: 0.0679180816643768
          vf_explained_var: 0.7145189642906189
          vf_loss: 0.22282839661671056
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,234,5723.5,234000,-0.8,6,-12,391.99


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-08_16-45-46
  done: false
  episode_len_mean: 392.42
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.69
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 620
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8345951941278247
          entropy_coeff: 0.009999999999999998
          kl: 0.018246121394657946
          policy_loss: -0.07761490320165952
          total_loss: 0.2510639583071073
          vf_explained_var: 0.6998655200004578
          vf_loss: 0.33663307713965573
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,235,5744.53,235000,-0.69,6,-12,392.42


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-08_16-46-08
  done: false
  episode_len_mean: 390.46
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.63
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 623
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6439431620968712
          entropy_coeff: 0.009999999999999998
          kl: 0.008985174371432928
          policy_loss: 0.05297842918015602
          total_loss: 0.25325900233971577
          vf_explained_var: 0.7879564762115479
          vf_loss: 0.2116026633315616
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,236,5767.16,236000,-0.63,6,-12,390.46


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-08_16-46-31
  done: false
  episode_len_mean: 389.03
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.76
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 625
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.580175953441196
          entropy_coeff: 0.009999999999999998
          kl: 0.015631738460364926
          policy_loss: -0.05812002279692226
          total_loss: 0.26530669720636474
          vf_explained_var: 0.7134262323379517
          vf_loss: 0.3303257140848372
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,237,5790.13,237000,-0.76,6,-12,389.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-08_16-46-52
  done: false
  episode_len_mean: 387.71
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.78
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 628
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8380936834547255
          entropy_coeff: 0.009999999999999998
          kl: 0.010603259492902773
          policy_loss: -0.029681210302644305
          total_loss: 0.14243688020441267
          vf_explained_var: 0.683272659778595
          vf_loss: 0.18446013507329756
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,238,5811.27,238000,-0.78,6,-12,387.71




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-08_16-47-30
  done: false
  episode_len_mean: 387.27
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.72
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 630
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.049943443139394
          entropy_coeff: 0.009999999999999998
          kl: 0.014308629651735119
          policy_loss: -0.132966182132562
          total_loss: 0.08910452988412645
          vf_explained_var: 0.28676483035087585
          vf_loss: 0.23442092945592272
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,239,5849.2,239000,-0.72,6,-12,387.27


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-08_16-47-59
  done: false
  episode_len_mean: 383.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.6
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 634
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1310551206270854
          entropy_coeff: 0.009999999999999998
          kl: 0.01371288642161201
          policy_loss: -0.047220775816175675
          total_loss: 0.30060601234436035
          vf_explained_var: 0.8321429491043091
          vf_loss: 0.351327418618732
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,240,5877.66,240000,-0.6,6,-12,383.5


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-08_16-48-27
  done: false
  episode_len_mean: 379.81
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.49
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 637
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4080556803279454
          entropy_coeff: 0.009999999999999998
          kl: 0.012955213830603699
          policy_loss: -0.11946879774332046
          total_loss: 0.10343332896526489
          vf_explained_var: 0.8497580885887146
          vf_loss: 0.2296042886045244
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,241,5906.1,241000,-0.49,6,-12,379.81


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-08_16-48-45
  done: false
  episode_len_mean: 378.11
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.49
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 639
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1878535853491887
          entropy_coeff: 0.009999999999999998
          kl: 0.012939603733027915
          policy_loss: -0.0309298704067866
          total_loss: 0.028567548634277448
          vf_explained_var: 0.4372209310531616
          vf_loss: 0.07400644538510177
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,242,5923.93,242000,-0.49,6,-12,378.11


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-08_16-49-14
  done: false
  episode_len_mean: 373.77
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.35
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 643
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2171195069948832
          entropy_coeff: 0.009999999999999998
          kl: 0.006592684298368094
          policy_loss: -0.09504106636676524
          total_loss: 0.17169238535894288
          vf_explained_var: 0.828678548336029
          vf_loss: 0.27514990650945237
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,243,5952.46,243000,-0.35,6,-12,373.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-08_16-49-33
  done: false
  episode_len_mean: 373.97
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.36
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 645
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0406707339816625
          entropy_coeff: 0.009999999999999998
          kl: 0.012658612691448095
          policy_loss: -0.10617785942223337
          total_loss: 0.20115311245123546
          vf_explained_var: 0.26404425501823425
          vf_loss: 0.3205282046770056
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,244,5971.71,244000,-0.36,6,-12,373.97


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-08_16-49-56
  done: false
  episode_len_mean: 372.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.38
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 648
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7974757300482855
          entropy_coeff: 0.009999999999999998
          kl: 0.019668378792443836
          policy_loss: -0.06362537137336201
          total_loss: 0.3461078782462411
          vf_explained_var: 0.7757164239883423
          vf_loss: 0.4165062384472953
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,245,5995.16,245000,-0.38,6,-12,372.41


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-08_16-50-19
  done: false
  episode_len_mean: 371.39
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.3
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 651
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8975734935866462
          entropy_coeff: 0.009999999999999998
          kl: 0.017017376975614024
          policy_loss: -0.1731310925549931
          total_loss: 0.12768534090783862
          vf_explained_var: 0.28759440779685974
          vf_loss: 0.31010023990852964
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,246,6018.05,246000,-0.3,6,-12,371.39


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-08_16-50-40
  done: false
  episode_len_mean: 371.42
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 653
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9911890321307713
          entropy_coeff: 0.009999999999999998
          kl: 0.01573189729968413
          policy_loss: -0.05977326950265301
          total_loss: 0.21039259785579312
          vf_explained_var: 0.7785248756408691
          vf_loss: 0.28111794735822415
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,247,6038.41,247000,-0.29,6,-12,371.42


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-08_16-51-00
  done: false
  episode_len_mean: 370.3
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.16
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 655
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.772161340713501
          entropy_coeff: 0.009999999999999998
          kl: 0.019692503753865588
          policy_loss: -0.12998621857000722
          total_loss: 0.3163806205822362
          vf_explained_var: 0.2753642797470093
          vf_loss: 0.4528729597727458
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,248,6059.14,248000,-0.16,6,-12,370.3


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-08_16-51-22
  done: false
  episode_len_mean: 371.28
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.13
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 658
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6645124978489345
          entropy_coeff: 0.009999999999999998
          kl: 0.011332433796854207
          policy_loss: -0.22815210413601664
          total_loss: -0.06424168418678973
          vf_explained_var: 0.7862867712974548
          vf_loss: 0.17410136581295066
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,249,6080.67,249000,-0.13,6,-12,371.28




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-08_16-52-02
  done: false
  episode_len_mean: 369.77
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 660
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7504501673910353
          entropy_coeff: 0.009999999999999998
          kl: 0.009429414877732849
          policy_loss: -0.1366593012172315
          total_loss: -0.05764403724008137
          vf_explained_var: 0.7054709792137146
          vf_loss: 0.09114942015666101
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,250,6120.43,250000,-0.12,6,-12,369.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-08_16-52-24
  done: false
  episode_len_mean: 368.16
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.0
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 663
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6108633756637574
          entropy_coeff: 0.009999999999999998
          kl: 0.007152368797762934
          policy_loss: -0.025463758181366654
          total_loss: 0.02756546904436416
          vf_explained_var: 0.548769474029541
          vf_loss: 0.06506436462902153
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,251,6142.68,251000,0,6,-12,368.16




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-08_16-53-13
  done: false
  episode_len_mean: 364.33
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.22
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 667
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.961530007256402
          entropy_coeff: 0.009999999999999998
          kl: 0.010064390597585791
          policy_loss: -0.08996925188435448
          total_loss: 0.09097933578822348
          vf_explained_var: 0.5959858894348145
          vf_loss: 0.19483190369792283
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,252,6191.48,252000,0.22,9,-12,364.33


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-08_16-53-32
  done: false
  episode_len_mean: 366.43
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.3
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 669
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.017258707682292
          entropy_coeff: 0.009999999999999998
          kl: 0.00972956972623041
          policy_loss: -0.06855903168519338
          total_loss: 0.16708281661073368
          vf_explained_var: 0.7516634464263916
          vf_loss: 0.25027314480394125
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,253,6210.59,253000,0.3,9,-12,366.43


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-08_16-53-50
  done: false
  episode_len_mean: 369.65
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.37
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 671
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1682334303855897
          entropy_coeff: 0.009999999999999998
          kl: 0.0072958908766151765
          policy_loss: -0.06329788176549805
          total_loss: -0.015817909066875777
          vf_explained_var: 0.17522971332073212
          vf_loss: 0.06500706690777507
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,254,6228.58,254000,0.37,9,-12,369.65


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-08_16-54-10
  done: false
  episode_len_mean: 368.77
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.38
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 673
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8513628310627408
          entropy_coeff: 0.009999999999999998
          kl: 0.020913452063980643
          policy_loss: 0.08072423011892371
          total_loss: 0.3357741917586989
          vf_explained_var: 0.5043972730636597
          vf_loss: 0.2616527250657479
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,255,6249,255000,0.38,9,-12,368.77


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-08_16-54-30
  done: false
  episode_len_mean: 370.57
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.5
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 676
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9234434790081447
          entropy_coeff: 0.009999999999999998
          kl: 0.008088855558863588
          policy_loss: -0.03454114720225334
          total_loss: 0.09668780721517073
          vf_explained_var: 0.48950493335723877
          vf_loss: 0.14355310350656508
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,256,6268.13,256000,0.5,9,-12,370.57


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-08_16-54-48
  done: false
  episode_len_mean: 372.05
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.62
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 678
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.237511506345537
          entropy_coeff: 0.009999999999999998
          kl: 0.005658520609898016
          policy_loss: 0.019824627372953627
          total_loss: 0.019548669043514464
          vf_explained_var: 0.42332667112350464
          vf_loss: 0.01726509854197502
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,257,6286.1,257000,0.62,9,-12,372.05


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-08_16-55-06
  done: false
  episode_len_mean: 373.65
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.73
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 680
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.263381263944838
          entropy_coeff: 0.009999999999999998
          kl: 0.006258942850286727
          policy_loss: -0.1287168186571863
          total_loss: -0.10046021391948064
          vf_explained_var: 0.9776836633682251
          vf_loss: 0.045543422032561565
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,258,6304.43,258000,0.73,9,-12,373.65


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-08_16-55-23
  done: false
  episode_len_mean: 375.24
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.83
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 682
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.2999992847442625
          entropy_coeff: 0.009999999999999998
          kl: 0.005082127152250631
          policy_loss: -0.02072404854827457
          total_loss: -0.004647849003473917
          vf_explained_var: 0.3160251975059509
          vf_loss: 0.03473454787033713
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,259,6321.96,259000,0.83,9,-12,375.24


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-08_16-55-45
  done: false
  episode_len_mean: 376.39
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 0.98
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 685
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8472361935509576
          entropy_coeff: 0.009999999999999998
          kl: 0.011267509848945645
          policy_loss: -0.033380376588967114
          total_loss: 0.11613969136443403
          vf_explained_var: 0.7170355319976807
          vf_loss: 0.15836663184066613
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,260,6343.41,260000,0.98,9,-12,376.39


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-08_16-56-04
  done: false
  episode_len_mean: 377.59
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.22
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 687
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.854296875
          cur_lr: 5.000000000000001e-05
          entropy: 2.3697206166055467
          entropy_coeff: 0.009999999999999998
          kl: 0.003639991396104768
          policy_loss: -0.09570948692659537
          total_loss: -0.05693870653501815
          vf_explained_var: 0.3004295527935028
          vf_loss: 0.05935835181218055
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,261,6362.04,261000,1.22,9,-12,377.59


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-08_16-56-26
  done: false
  episode_len_mean: 378.49
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.41
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 690
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8614953676859538
          entropy_coeff: 0.009999999999999998
          kl: 0.011688945501982254
          policy_loss: -0.06737923117147551
          total_loss: -0.006533927346269289
          vf_explained_var: 0.8216189742088318
          vf_loss: 0.07446734070415711
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,262,6384.48,262000,1.41,9,-12,378.49


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-08_16-56-53
  done: false
  episode_len_mean: 375.6
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.54
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 693
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.561756130721834
          entropy_coeff: 0.009999999999999998
          kl: 0.009161160748305846
          policy_loss: -0.024014590432246526
          total_loss: 0.0254985015011496
          vf_explained_var: 0.5255440473556519
          vf_loss: 0.06121747959405184
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,263,6411.82,263000,1.54,9,-12,375.6




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-08_16-57-30
  done: false
  episode_len_mean: 376.23
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.78
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 696
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.034387781884935
          entropy_coeff: 0.009999999999999998
          kl: 0.010597390520622281
          policy_loss: -0.07645262396997876
          total_loss: -0.00462298807170656
          vf_explained_var: 0.9667097330093384
          vf_loss: 0.08764685311147737
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,264,6448.65,264000,1.78,9,-12,376.23


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-08_16-57-49
  done: false
  episode_len_mean: 378.7
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.79
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 698
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.982243882285224
          entropy_coeff: 0.009999999999999998
          kl: 0.011979668714064574
          policy_loss: -0.22448963075876235
          total_loss: -0.19656957512100537
          vf_explained_var: 0.47613295912742615
          vf_loss: 0.04262539359430472
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,265,6467.53,265000,1.79,9,-12,378.7


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-08_16-58-06
  done: false
  episode_len_mean: 382.23
  episode_media: {}
  episode_reward_max: 9.0
  episode_reward_mean: 1.88
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 700
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.45582228369183
          entropy_coeff: 0.009999999999999998
          kl: 0.007308662315843822
          policy_loss: 0.03605615645647049
          total_loss: 0.029596983972522948
          vf_explained_var: 0.9347833395004272
          vf_loss: 0.014977166676221208
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,266,6484.6,266000,1.88,9,-12,382.23




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-08_16-59-04
  done: false
  episode_len_mean: 380.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.11
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 703
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9631349192725287
          entropy_coeff: 0.009999999999999998
          kl: 0.011278317337268282
          policy_loss: -0.08418158723248376
          total_loss: 0.006324106454849243
          vf_explained_var: 0.9494575262069702
          vf_loss: 0.10531952360437977
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,267,6542.03,267000,2.11,10,-12,380.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-08_16-59-22
  done: false
  episode_len_mean: 384.12
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.22
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 705
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.126993641588423
          entropy_coeff: 0.009999999999999998
          kl: 0.008837982711752524
          policy_loss: 0.014880634616646501
          total_loss: 0.07535214939465125
          vf_explained_var: 0.7154663801193237
          vf_loss: 0.07796632051467896
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,268,6560.66,268000,2.22,10,-12,384.12


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-08_16-59-44
  done: false
  episode_len_mean: 387.27
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.36
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 708
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.157940740386645
          entropy_coeff: 0.009999999999999998
          kl: 0.030902537346837632
          policy_loss: -0.026766697896851432
          total_loss: 0.20947781412137878
          vf_explained_var: 0.9403523802757263
          vf_loss: 0.23462393906795317
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,269,6582.37,269000,2.36,10,-12,387.27


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-08_17-00-04
  done: false
  episode_len_mean: 388.82
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.55
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 710
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1198598080211215
          entropy_coeff: 0.009999999999999998
          kl: 0.007467050940542934
          policy_loss: -0.13492941810852951
          total_loss: -0.13216598100132412
          vf_explained_var: 0.4515605568885803
          vf_loss: 0.01917772547652324
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,270,6602.37,270000,2.55,10,-12,388.82


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-08_17-00-21
  done: false
  episode_len_mean: 390.54
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.67
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 712
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.093434045049879
          entropy_coeff: 0.009999999999999998
          kl: 0.016067724015139034
          policy_loss: -0.14251029822561476
          total_loss: 0.359278260750903
          vf_explained_var: 0.644807755947113
          vf_loss: 0.5124279564453496
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,271,6619.23,271000,2.67,10,-12,390.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-08_17-00-43
  done: false
  episode_len_mean: 391.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.75
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 715
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6259966995981006
          entropy_coeff: 0.009999999999999998
          kl: 0.013058094114830925
          policy_loss: -0.03388712008794149
          total_loss: 0.6304652776776088
          vf_explained_var: 0.643039345741272
          vf_loss: 0.6722457459403409
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,272,6641.43,272000,2.75,10,-12,391.71


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-08_17-01-01
  done: false
  episode_len_mean: 392.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.96
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 717
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8434553451008266
          entropy_coeff: 0.009999999999999998
          kl: 0.01311815820342874
          policy_loss: -0.04214322335190243
          total_loss: 0.11137437563803461
          vf_explained_var: 0.44581323862075806
          vf_loss: 0.1635470473104053
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,273,6658.88,273000,2.96,10,-12,392.22




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-08_17-01-56
  done: false
  episode_len_mean: 390.23
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.01
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 720
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8143129348754883
          entropy_coeff: 0.009999999999999998
          kl: 0.010094477474770864
          policy_loss: 0.10504164128667778
          total_loss: 0.39572473011083076
          vf_explained_var: 0.5539809465408325
          vf_loss: 0.3023584625373284
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,274,6714.42,274000,3.01,10,-12,390.23


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-08_17-02-14
  done: false
  episode_len_mean: 393.9
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.0
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 722
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8575262599521214
          entropy_coeff: 0.009999999999999998
          kl: 0.015496967838168117
          policy_loss: 0.004260279072655572
          total_loss: 0.3433641817420721
          vf_explained_var: 0.4517231285572052
          vf_loss: 0.3477499097585678
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,275,6732,275000,3,10,-12,393.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-08_17-02-32
  done: false
  episode_len_mean: 394.64
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.17
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 724
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9086019582218594
          entropy_coeff: 0.009999999999999998
          kl: 0.010601476579059346
          policy_loss: -0.06571758410169018
          total_loss: 0.11838294756081369
          vf_explained_var: 0.5173894762992859
          vf_loss: 0.19639394476802813
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,276,6750.02,276000,3.17,10,-12,394.64


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-08_17-02-50
  done: false
  episode_len_mean: 395.9
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.37
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 726
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4981771111488342
          entropy_coeff: 0.009999999999999998
          kl: 0.010381009084773076
          policy_loss: -0.052573849426375496
          total_loss: 0.11851866567093465
          vf_explained_var: 0.7676146030426025
          vf_loss: 0.17942294027242395
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,277,6768.16,277000,3.37,10,-12,395.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-08_17-03-06
  done: false
  episode_len_mean: 398.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.42
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 728
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.671782500214047
          entropy_coeff: 0.009999999999999998
          kl: 0.011988570713336744
          policy_loss: -0.02788255765206284
          total_loss: 0.3645545883725087
          vf_explained_var: 0.8168433308601379
          vf_loss: 0.40147361738814247
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,278,6784.48,278000,3.42,10,-12,398.31


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-08_17-03-25
  done: false
  episode_len_mean: 399.9
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.48
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 730
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.08619571129481
          entropy_coeff: 0.009999999999999998
          kl: 0.009402888604078343
          policy_loss: -0.028131017254458532
          total_loss: 0.13264850390454133
          vf_explained_var: 0.8665416240692139
          vf_loss: 0.1756168363822831
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,279,6802.69,279000,3.48,10,-12,399.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-08_17-03-43
  done: false
  episode_len_mean: 403.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.55
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 733
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0447369204627144
          entropy_coeff: 0.009999999999999998
          kl: 0.007263089050159345
          policy_loss: -0.057497191429138186
          total_loss: 0.1700943077603976
          vf_explained_var: 0.8311173915863037
          vf_loss: 0.24338524701694647
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,280,6820.75,280000,3.55,10,-12,403.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-08_17-03-59
  done: false
  episode_len_mean: 407.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.65
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 735
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7751004238923391
          entropy_coeff: 0.009999999999999998
          kl: 0.008073381467086636
          policy_loss: -0.13437647303152417
          total_loss: 0.08380766369195448
          vf_explained_var: 0.07794912904500961
          vf_loss: 0.23076234391580025
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,281,6836.99,281000,3.65,10,-12,407.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-08_17-04-23
  done: false
  episode_len_mean: 407.83
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.69
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 738
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2836639821529388
          entropy_coeff: 0.009999999999999998
          kl: 0.01902391100382693
          policy_loss: -0.11470280906392469
          total_loss: 0.11782637081212467
          vf_explained_var: 0.7578635215759277
          vf_loss: 0.2331767681365212
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,282,6861.3,282000,3.69,10,-12,407.83


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-08_17-04-44
  done: false
  episode_len_mean: 406.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.71
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 740
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5717877957555983
          entropy_coeff: 0.009999999999999998
          kl: 0.013896964149954666
          policy_loss: 0.015649333596229553
          total_loss: 0.17673367477125593
          vf_explained_var: 0.8516474962234497
          vf_loss: 0.16789811924099923
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,283,6881.65,283000,3.71,10,-12,406.99


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-08_17-05-00
  done: false
  episode_len_mean: 411.18
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.79
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 742
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0050316744380527
          entropy_coeff: 0.009999999999999998
          kl: 0.012750110969160408
          policy_loss: -0.04124150783237484
          total_loss: 0.005681258170968957
          vf_explained_var: 0.6155575513839722
          vf_loss: 0.05880379769951105
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,284,6897.69,284000,3.79,10,-12,411.18


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-08_17-05-17
  done: false
  episode_len_mean: 413.18
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.83
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 744
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6482304414113362
          entropy_coeff: 0.009999999999999998
          kl: 0.007466761918287388
          policy_loss: -0.02754914557768239
          total_loss: 0.1465708757026328
          vf_explained_var: 0.39014294743537903
          vf_loss: 0.18581819995823834
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,285,6914.55,285000,3.83,10,-12,413.18


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-08_17-05-32
  done: false
  episode_len_mean: 416.08
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.09
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 746
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.85783109664917
          entropy_coeff: 0.009999999999999998
          kl: 0.011462179847189762
          policy_loss: -0.07761013760334916
          total_loss: 0.04095861537175046
          vf_explained_var: 0.8758978843688965
          vf_loss: 0.12980298702087667
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,286,6929.87,286000,4.09,10,-12,416.08


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-08_17-05-50
  done: false
  episode_len_mean: 418.01
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.08
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 748
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8101416852739123
          entropy_coeff: 0.009999999999999998
          kl: 0.009242923066657922
          policy_loss: -0.037875067525439794
          total_loss: 0.05734352775745922
          vf_explained_var: 0.8556803464889526
          vf_loss: 0.10739786091984974
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,287,6947.4,287000,4.08,10,-12,418.01




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-08_17-06-22
  done: false
  episode_len_mean: 421.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.15
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 750
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2093524992465974
          entropy_coeff: 0.009999999999999998
          kl: 0.01362787750493442
          policy_loss: -0.032030496001243594
          total_loss: 0.3852212616552909
          vf_explained_var: 0.6145175099372864
          vf_loss: 0.42061359302865137
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,288,6979.57,288000,4.15,10,-12,421.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-08_17-06-39
  done: false
  episode_len_mean: 423.11
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.37
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 752
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7196778376897177
          entropy_coeff: 0.009999999999999998
          kl: 0.011095439330245692
          policy_loss: -0.0015987845758597056
          total_loss: 0.4520568241675695
          vf_explained_var: 0.7573316693305969
          vf_loss: 0.4637432807849513
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,289,6996.52,289000,4.37,10,-2,423.11


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-08_17-06-56
  done: false
  episode_len_mean: 424.1
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.41
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 754
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5524844884872437
          entropy_coeff: 0.009999999999999998
          kl: 0.017093273249784918
          policy_loss: -0.07503005547655953
          total_loss: 0.1031030338878433
          vf_explained_var: 0.5990929007530212
          vf_loss: 0.18270588542024294
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,290,7013.57,290000,4.41,10,-2,424.1


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-08_17-07-12
  done: false
  episode_len_mean: 426.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.4
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 756
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5873633126417797
          entropy_coeff: 0.009999999999999998
          kl: 0.012023894825905835
          policy_loss: 0.1009873017668724
          total_loss: 0.2801448056474328
          vf_explained_var: 0.7820607423782349
          vf_loss: 0.18732715919613838
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,291,7029.82,291000,4.4,10,-5,426.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-08_17-07-29
  done: false
  episode_len_mean: 427.55
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.51
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 758
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8243105265829298
          entropy_coeff: 0.009999999999999998
          kl: 0.012764947236982889
          policy_loss: -0.0894015277011527
          total_loss: 0.11639550094389253
          vf_explained_var: 0.7165001034736633
          vf_loss: 0.21586134129514298
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,292,7047.21,292000,4.51,10,-5,427.55


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-08_17-07-46
  done: false
  episode_len_mean: 430.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.59
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 760
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9222833765877618
          entropy_coeff: 0.009999999999999998
          kl: 0.011699441298868605
          policy_loss: -0.13427579262190395
          total_loss: -0.05831376554237472
          vf_explained_var: 0.791107714176178
          vf_loss: 0.08768876268424922
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,293,7064.19,293000,4.59,10,-5,430.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-08_17-08-06
  done: false
  episode_len_mean: 431.41
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.58
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 763
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7536607093281216
          entropy_coeff: 0.009999999999999998
          kl: 0.0053725740308301
          policy_loss: -0.0702342356244723
          total_loss: 0.019368946469492384
          vf_explained_var: 0.7484950423240662
          vf_loss: 0.10369745660573244
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,294,7084.03,294000,4.58,10,-5,431.41


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-08_17-08-27
  done: false
  episode_len_mean: 430.72
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.53
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 765
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7835397316349877
          entropy_coeff: 0.009999999999999998
          kl: 0.013614154979025272
          policy_loss: 0.013771314918994904
          total_loss: 0.1416669194897016
          vf_explained_var: 0.8234504461288452
          vf_loss: 0.13700810329367716
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,295,7104.36,295000,4.53,10,-5,430.72




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-08_17-09-25
  done: false
  episode_len_mean: 435.54
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.46
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 767
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.399872339434094
          entropy_coeff: 0.009999999999999998
          kl: 0.01849792011192181
          policy_loss: -0.05921126852432887
          total_loss: 0.6911232378747728
          vf_explained_var: 0.40038952231407166
          vf_loss: 0.7524811956617568
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,296,7163.05,296000,4.46,10,-5,435.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-08_17-09-42
  done: false
  episode_len_mean: 436.89
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.52
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 770
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1739113357332016
          entropy_coeff: 0.009999999999999998
          kl: 0.00665885349465262
          policy_loss: -0.05278146856774887
          total_loss: -0.022681471415691906
          vf_explained_var: 0.7821878790855408
          vf_loss: 0.04757263453470336
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,297,7179.95,297000,4.52,10,-5,436.89


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-08_17-09-59
  done: false
  episode_len_mean: 438.0
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.56
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 772
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0523120548990037
          entropy_coeff: 0.009999999999999998
          kl: 0.009305711270799414
          policy_loss: 0.019430678255028196
          total_loss: 0.13734477882583937
          vf_explained_var: 0.7930223941802979
          vf_loss: 0.132474843516118
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,298,7196.74,298000,4.56,10,-5,438


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-08_17-10-17
  done: false
  episode_len_mean: 438.0
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.64
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 774
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0918477972348533
          entropy_coeff: 0.009999999999999998
          kl: 0.014371694402279466
          policy_loss: 0.02854283567931917
          total_loss: 0.19496196516686015
          vf_explained_var: 0.6250301003456116
          vf_loss: 0.1781293334128956
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,299,7215,299000,4.64,10,-5,438


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-08_17-10-37
  done: false
  episode_len_mean: 437.83
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.52
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 776
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7053083803918627
          entropy_coeff: 0.009999999999999998
          kl: 0.014541340531721483
          policy_loss: 0.015039909217092726
          total_loss: 0.44593113027513026
          vf_explained_var: 0.6258677840232849
          vf_loss: 0.43862734370761447
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,300,7234.31,300000,4.52,10,-5,437.83


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-08_17-10-57
  done: false
  episode_len_mean: 436.58
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.42
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 779
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3973762883080376
          entropy_coeff: 0.009999999999999998
          kl: 0.021650209352891952
          policy_loss: 0.023269061620036762
          total_loss: 0.3375765544672807
          vf_explained_var: 0.6210007071495056
          vf_loss: 0.31440947329004604
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,301,7254.34,301000,4.42,10,-5,436.58


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-08_17-11-13
  done: false
  episode_len_mean: 438.1
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.37
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 781
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4555010855197907
          entropy_coeff: 0.009999999999999998
          kl: 0.013902478328056588
          policy_loss: -0.07784572256108126
          total_loss: 0.17611882955663735
          vf_explained_var: 0.82222580909729
          vf_loss: 0.2551581107907825
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,302,7270.87,302000,4.37,10,-5,438.1


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-08_17-11-31
  done: false
  episode_len_mean: 438.07
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.34
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 783
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6764468206299676
          entropy_coeff: 0.009999999999999998
          kl: 0.010308828955836959
          policy_loss: -0.06345781824655003
          total_loss: 0.16307987835672166
          vf_explained_var: 0.12066594511270523
          vf_loss: 0.23339451336198383
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,303,7288.49,303000,4.34,10,-5,438.07


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-08_17-11-48
  done: false
  episode_len_mean: 440.58
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.37
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 785
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.763102040025923
          entropy_coeff: 0.009999999999999998
          kl: 0.008119829865529284
          policy_loss: -0.1506979723771413
          total_loss: 0.0021228470736079745
          vf_explained_var: 0.5869736671447754
          vf_loss: 0.1626480032586389
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,304,7305.5,304000,4.37,10,-5,440.58


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-08_17-12-08
  done: false
  episode_len_mean: 439.82
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.26
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 787
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.818873107433319
          entropy_coeff: 0.009999999999999998
          kl: 0.008433997480264329
          policy_loss: -0.08392707568903764
          total_loss: 0.04182145881156127
          vf_explained_var: 0.6062718629837036
          vf_loss: 0.13583148208757242
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,305,7325.29,305000,4.26,10,-5,439.82


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-08_17-12-25
  done: false
  episode_len_mean: 442.64
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.28
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 789
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1713587946361965
          entropy_coeff: 0.009999999999999998
          kl: 0.008407599638416377
          policy_loss: -0.0565200075507164
          total_loss: 0.14410326172494226
          vf_explained_var: 0.5004452466964722
          vf_loss: 0.21425644839182495
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,306,7342.19,306000,4.28,10,-5,442.64


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-08_17-12-42
  done: false
  episode_len_mean: 443.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.14
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 791
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0772716893090144
          entropy_coeff: 0.009999999999999998
          kl: 0.007520558971429169
          policy_loss: -0.06054219546624356
          total_loss: 0.04891807559049792
          vf_explained_var: 0.16913650929927826
          vf_loss: 0.12300509895301527
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,307,7359.77,307000,4.14,10,-5,443.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-08_17-13-00
  done: false
  episode_len_mean: 448.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.15
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 794
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.061639404296875
          entropy_coeff: 0.009999999999999998
          kl: 0.0074991091050111945
          policy_loss: 0.0332437157837881
          total_loss: 0.11014671062843667
          vf_explained_var: 0.26194655895233154
          vf_loss: 0.09031211144497825
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,308,7377.65,308000,4.15,10,-5,448.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-08_17-13-18
  done: false
  episode_len_mean: 450.07
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.99
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 796
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 2.040237573782603
          entropy_coeff: 0.009999999999999998
          kl: 0.015972232810895885
          policy_loss: 0.0387748707499769
          total_loss: 0.357785514742136
          vf_explained_var: 0.7350740432739258
          vf_loss: 0.3240623661213451
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,309,7395.83,309000,3.99,10,-5,450.07




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-08_17-13-52
  done: false
  episode_len_mean: 451.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.98
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 798
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1281428257624309
          entropy_coeff: 0.009999999999999998
          kl: 0.003212878993435374
          policy_loss: -0.22051541970835792
          total_loss: -0.16389482923679882
          vf_explained_var: -0.2827160656452179
          vf_loss: 0.06481417265410225
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,310,7428.97,310000,3.98,10,-5,451.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-08_17-14-09
  done: false
  episode_len_mean: 451.24
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.91
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 800
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2664751794603135
          entropy_coeff: 0.009999999999999998
          kl: 0.02015513130285054
          policy_loss: -0.05325193140241835
          total_loss: 0.012692615886529287
          vf_explained_var: -0.046519935131073
          vf_loss: 0.078923910876943
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,311,7446.17,311000,3.91,10,-5,451.24


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-08_17-14-28
  done: false
  episode_len_mean: 450.97
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.89
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 802
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 2.2043986214531794
          entropy_coeff: 0.009999999999999998
          kl: 0.01295901329116802
          policy_loss: -0.11262529840071996
          total_loss: 0.09587334924274021
          vf_explained_var: 0.5735534429550171
          vf_loss: 0.2212016132970651
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,312,7465.75,312000,3.89,10,-5,450.97


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-08_17-14-46
  done: false
  episode_len_mean: 454.44
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.83
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 804
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 2.2183524356948006
          entropy_coeff: 0.009999999999999998
          kl: 0.009952094328739728
          policy_loss: -0.021815314557817246
          total_loss: 0.11908439675139057
          vf_explained_var: 0.1031179130077362
          vf_loss: 0.15590963564657917
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,313,7483.32,313000,3.83,10,-5,454.44


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-08_17-15-03
  done: false
  episode_len_mean: 452.74
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.85
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 806
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.9471333980560304
          entropy_coeff: 0.009999999999999998
          kl: 0.009259090992712524
          policy_loss: -0.07147814631462097
          total_loss: -0.032348400271601144
          vf_explained_var: 0.7541350722312927
          vf_loss: 0.051927002684937586
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,314,7499.91,314000,3.85,10,-5,452.74


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-08_17-15-21
  done: false
  episode_len_mean: 456.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.81
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 809
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8393680744700962
          entropy_coeff: 0.009999999999999998
          kl: 0.012125391348487547
          policy_loss: 0.006796630968650182
          total_loss: 0.3588255542847845
          vf_explained_var: 0.7707377672195435
          vf_loss: 0.36168246641755103
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,315,7518.11,315000,3.81,10,-5,456.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-08_17-15-35
  done: false
  episode_len_mean: 458.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.77
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 810
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.1324348429838815
          entropy_coeff: 0.009999999999999998
          kl: 0.01438471199058532
          policy_loss: -0.003971436123053233
          total_loss: 0.19166828451885118
          vf_explained_var: 0.6712617874145508
          vf_loss: 0.19659538625015152
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,316,7532.47,316000,3.77,10,-5,458.71


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-08_17-15-53
  done: false
  episode_len_mean: 459.61
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.83
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 812
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.8305006477567884
          entropy_coeff: 0.009999999999999998
          kl: 0.014721027279443275
          policy_loss: -0.015557835830582513
          total_loss: 0.22376658601893318
          vf_explained_var: 0.6706510186195374
          vf_loss: 0.24701831887165707
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,317,7550.22,317000,3.83,10,-5,459.61


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-08_17-16-11
  done: false
  episode_len_mean: 460.64
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.93
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 815
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 2.1301155514187284
          entropy_coeff: 0.009999999999999998
          kl: 0.003797432060768793
          policy_loss: -0.07839834002984894
          total_loss: -0.05972943405310313
          vf_explained_var: 0.34971973299980164
          vf_loss: 0.03723282189263652
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,318,7567.81,318000,3.93,10,-5,460.64


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-08_17-16-32
  done: false
  episode_len_mean: 459.73
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.91
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 817
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.9237023380067613
          entropy_coeff: 0.009999999999999998
          kl: 0.010808682455744586
          policy_loss: -0.11321612646182379
          total_loss: -0.02989012168513404
          vf_explained_var: -0.3418753445148468
          vf_loss: 0.09866750644416444
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,319,7588.72,319000,3.91,10,-5,459.73


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-08_17-16-50
  done: false
  episode_len_mean: 460.44
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.87
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 820
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.769646696249644
          entropy_coeff: 0.009999999999999998
          kl: 0.020141421046538474
          policy_loss: -0.10630584646844202
          total_loss: 0.12850174233317374
          vf_explained_var: 0.5104609727859497
          vf_loss: 0.2452449599901835
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,320,7607.66,320000,3.87,8,-5,460.44


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-08_17-17-08
  done: false
  episode_len_mean: 460.14
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.89
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 822
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9072558999061584
          entropy_coeff: 0.009999999999999998
          kl: 0.014284447453929486
          policy_loss: -0.03128850865695212
          total_loss: 0.1572323412530952
          vf_explained_var: 0.2971436679363251
          vf_loss: 0.19987109624263313
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,321,7625.55,321000,3.89,8,-5,460.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-08_17-17-27
  done: false
  episode_len_mean: 460.18
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.85
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 824
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.2390225225024754
          entropy_coeff: 0.009999999999999998
          kl: 0.011056177595968133
          policy_loss: 0.02108359556231234
          total_loss: 0.13057014817992846
          vf_explained_var: 0.6044613718986511
          vf_loss: 0.12589970127575928
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,322,7644.44,322000,3.85,8,-5,460.18




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-08_17-18-04
  done: false
  episode_len_mean: 456.52
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.93
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 827
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.4626359681288401
          entropy_coeff: 0.009999999999999998
          kl: 0.011464443558533767
          policy_loss: 0.091991460778647
          total_loss: 0.16266903856562243
          vf_explained_var: 0.9324560165405273
          vf_loss: 0.07910614781495598
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,323,7681.6,323000,3.93,8,-5,456.52


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-08_17-18-28
  done: false
  episode_len_mean: 454.64
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.95
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 829
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.5530571871333652
          entropy_coeff: 0.009999999999999998
          kl: 0.009066894247017308
          policy_loss: 0.05534056706560982
          total_loss: 0.16229440238740708
          vf_explained_var: 0.7286220192909241
          vf_loss: 0.11758275929217538
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,324,7704.7,324000,3.95,8,-5,454.64


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-08_17-18-46
  done: false
  episode_len_mean: 454.54
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.91
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 832
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.4676397773954604
          entropy_coeff: 0.009999999999999998
          kl: 0.010907545450707603
          policy_loss: -0.16828270703554155
          total_loss: 0.13972067659099896
          vf_explained_var: 0.7229114770889282
          vf_loss: 0.3167830565219952
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,325,7723.48,325000,3.91,8,-5,454.54


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-08_17-19-02
  done: false
  episode_len_mean: 455.0
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.88
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 833
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.2962282293372684
          entropy_coeff: 0.009999999999999998
          kl: 0.008964367054569787
          policy_loss: -0.1162688150174088
          total_loss: 0.14011994954198598
          vf_explained_var: 0.6103463768959045
          vf_loss: 0.2645048204395506
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,326,7739.37,326000,3.88,8,-5,455


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-08_17-19-20
  done: false
  episode_len_mean: 457.1
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.94
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 836
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9319677551587422
          entropy_coeff: 0.009999999999999998
          kl: 0.00982125670963374
          policy_loss: -0.12582981627848414
          total_loss: -0.03866312777002653
          vf_explained_var: 0.5101766586303711
          vf_loss: 0.1011768984556612
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,327,7757.06,327000,3.94,8,-5,457.1


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-08_17-19-38
  done: false
  episode_len_mean: 459.07
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.93
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 838
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.868061782254113
          entropy_coeff: 0.009999999999999998
          kl: 0.013979060337675462
          policy_loss: 0.0129107094473309
          total_loss: 0.24477464778141844
          vf_explained_var: 0.6253398656845093
          vf_loss: 0.24298733952972623
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,328,7775.16,328000,3.93,8,-5,459.07


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-08_17-19-55
  done: false
  episode_len_mean: 460.81
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.92
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 840
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.1741900549994573
          entropy_coeff: 0.009999999999999998
          kl: 0.009602696093840122
          policy_loss: -0.12485654883914524
          total_loss: -0.09345808592107561
          vf_explained_var: 0.26115572452545166
          vf_loss: 0.04794905175610135
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,329,7792.11,329000,3.92,8,-5,460.81


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-08_17-20-11
  done: false
  episode_len_mean: 462.01
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.81
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 842
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.1303625583648682
          entropy_coeff: 0.009999999999999998
          kl: 0.013947274553746385
          policy_loss: -0.042929955240752965
          total_loss: 0.17257234876354535
          vf_explained_var: 0.6511344313621521
          vf_loss: 0.21926590020043982
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,330,7807.62,330000,3.81,8,-5,462.01


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-08_17-20-28
  done: false
  episode_len_mean: 462.24
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.74
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 844
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.047568558322059
          entropy_coeff: 0.009999999999999998
          kl: 0.008647248619823245
          policy_loss: -0.060435087896055645
          total_loss: -0.028866303174032104
          vf_explained_var: 0.06295796483755112
          vf_loss: 0.047369681746284996
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,331,7824.88,331000,3.74,8,-5,462.24


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-08_17-20-44
  done: false
  episode_len_mean: 461.5
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.67
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 846
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.55557292898496
          entropy_coeff: 0.009999999999999998
          kl: 0.007743939964348259
          policy_loss: -0.037028031961785425
          total_loss: 0.09530268997574846
          vf_explained_var: 0.6236047744750977
          vf_loss: 0.14370000254776744
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,332,7841.29,332000,3.67,8,-5,461.5


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-08_17-21-02
  done: false
  episode_len_mean: 461.63
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.65
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 848
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9016746785905627
          entropy_coeff: 0.009999999999999998
          kl: 0.02119843946731982
          policy_loss: -0.034287264280849036
          total_loss: 0.31400866951379514
          vf_explained_var: 0.6439001560211182
          vf_loss: 0.35585260002149477
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,333,7859.38,333000,3.65,8,-5,461.63


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-08_17-21-20
  done: false
  episode_len_mean: 460.48
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.62
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 850
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6891500572363536
          entropy_coeff: 0.009999999999999998
          kl: 0.011996708811249531
          policy_loss: 0.012036735895607206
          total_loss: 0.2584383759026726
          vf_explained_var: 0.4743422567844391
          vf_loss: 0.2535648343463739
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,334,7876.79,334000,3.62,8,-5,460.48


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-08_17-21-41
  done: false
  episode_len_mean: 457.9
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.6
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 853
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9207294278674656
          entropy_coeff: 0.009999999999999998
          kl: 0.008566784423731318
          policy_loss: -0.2060086061971055
          total_loss: -0.13189551482597986
          vf_explained_var: 0.7325615882873535
          vf_loss: 0.08637345053462518
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,335,7898.13,335000,3.6,8,-5,457.9


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-08_17-22-01
  done: false
  episode_len_mean: 455.61
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.58
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 855
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.554168364736769
          entropy_coeff: 0.009999999999999998
          kl: 0.011945115472573854
          policy_loss: -0.10824622636040052
          total_loss: 0.17022531545824474
          vf_explained_var: 0.6172411441802979
          vf_loss: 0.28432675848404565
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,336,7918.15,336000,3.58,8,-5,455.61




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-08_17-22-38
  done: false
  episode_len_mean: 453.97
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.58
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 857
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.3826473673184714
          entropy_coeff: 0.009999999999999998
          kl: 0.012570561186647526
          policy_loss: -0.02264343591199981
          total_loss: 0.2474446619550387
          vf_explained_var: 0.7148337364196777
          vf_loss: 0.2737209168573221
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,337,7955.13,337000,3.58,8,-5,453.97


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-08_17-22-58
  done: false
  episode_len_mean: 451.65
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.6
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 860
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9210068252351549
          entropy_coeff: 0.009999999999999998
          kl: 0.011309351563507125
          policy_loss: -0.08590994922237263
          total_loss: 0.13485439862642024
          vf_explained_var: 0.5943502187728882
          vf_loss: 0.23080349597666
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,338,7974.97,338000,3.6,8,-5,451.65


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-08_17-23-20
  done: false
  episode_len_mean: 451.91
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.62
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 862
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7615779519081116
          entropy_coeff: 0.009999999999999998
          kl: 0.0119106597344002
          policy_loss: -0.1549626215464539
          total_loss: -0.08189410865306854
          vf_explained_var: 0.5900276303291321
          vf_loss: 0.08102576125206219
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,339,7996.37,339000,3.62,8,-5,451.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-08_17-23-39
  done: false
  episode_len_mean: 452.66
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.69
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 865
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.14492523405287
          entropy_coeff: 0.009999999999999998
          kl: 0.008062675829892967
          policy_loss: -0.11600251181258095
          total_loss: -0.07455838177767064
          vf_explained_var: 0.3532291650772095
          vf_loss: 0.056355240806523293
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,340,8015.47,340000,3.69,8,-5,452.66


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-08_17-23-57
  done: false
  episode_len_mean: 453.62
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.72
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 867
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.0523857209417553
          entropy_coeff: 0.009999999999999998
          kl: 0.010170966707744988
          policy_loss: 0.0748191687795851
          total_loss: 0.17324211013813814
          vf_explained_var: 0.12415122985839844
          vf_loss: 0.110699009274443
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,341,8033.91,341000,3.72,8,-5,453.62


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-08_17-24-20
  done: false
  episode_len_mean: 452.74
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.68
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 869
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8295127444797092
          entropy_coeff: 0.009999999999999998
          kl: 0.016071849677073008
          policy_loss: -0.02760607832007938
          total_loss: 0.36009878458248246
          vf_explained_var: 0.48933202028274536
          vf_loss: 0.39296709299087523
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,342,8057.08,342000,3.68,8,-5,452.74


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-08_17-24-47
  done: false
  episode_len_mean: 446.23
  episode_media: {}
  episode_reward_max: 8.0
  episode_reward_mean: 3.69
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 873
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.695815176433987
          entropy_coeff: 0.009999999999999998
          kl: 0.018879391386598363
          policy_loss: 0.012258690264489915
          total_loss: 0.29541467212968403
          vf_explained_var: 0.9019767642021179
          vf_loss: 0.28480456132027837
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,343,8083.8,343000,3.69,8,-5,446.23




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-08_17-26-10
  done: false
  episode_len_mean: 438.57
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.84
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 877
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9118536710739136
          entropy_coeff: 0.009999999999999998
          kl: 0.013131232822320626
          policy_loss: 0.042705236706468794
          total_loss: 0.20264157843258646
          vf_explained_var: 0.8950369358062744
          vf_loss: 0.16840656931615539
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,344,8166.22,344000,3.84,10,-5,438.57




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-08_17-26-59
  done: false
  episode_len_mean: 434.5
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.98
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 880
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8479359083705478
          entropy_coeff: 0.009999999999999998
          kl: 0.007685027642990535
          policy_loss: -0.1192965974410375
          total_loss: 0.021991092753079203
          vf_explained_var: 0.6471982002258301
          vf_loss: 0.15353514932923848
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,345,8215.17,345000,3.98,10,-5,434.5




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-08_17-27-57
  done: false
  episode_len_mean: 427.94
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.06
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 883
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.3428085247675579
          entropy_coeff: 0.009999999999999998
          kl: 0.008725124734505445
          policy_loss: -0.08435883190896776
          total_loss: 0.16545281459887823
          vf_explained_var: 0.7138420939445496
          vf_loss: 0.25616440052787465
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,346,8273.41,346000,4.06,10,-5,427.94


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-08_17-28-15
  done: false
  episode_len_mean: 426.67
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.07
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 885
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7972911596298218
          entropy_coeff: 0.009999999999999998
          kl: 0.006755257361699327
          policy_loss: -0.17879992959400018
          total_loss: -0.08767904918640852
          vf_explained_var: 0.7548989057540894
          vf_loss: 0.10361585463914606
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,347,8291.32,347000,4.07,10,-5,426.67


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-08_17-28-32
  done: false
  episode_len_mean: 427.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.11
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 887
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5048319538434347
          entropy_coeff: 0.009999999999999998
          kl: 0.013626781605726477
          policy_loss: 0.01696529487768809
          total_loss: 0.35929884927140343
          vf_explained_var: 0.8596631288528442
          vf_loss: 0.3463317286223173
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,348,8308.19,348000,4.11,10,-5,427.28


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-08_17-28-51
  done: false
  episode_len_mean: 427.15
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.11
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 890
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5788703163464863
          entropy_coeff: 0.009999999999999998
          kl: 0.015468958720513855
          policy_loss: -0.030575277159611385
          total_loss: 0.36328605545891657
          vf_explained_var: 0.8444334864616394
          vf_loss: 0.39710603290134006
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,349,8327.6,349000,4.11,10,-5,427.15


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-08_17-29-12
  done: false
  episode_len_mean: 426.52
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.18
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 892
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.0034446239471437
          entropy_coeff: 0.009999999999999998
          kl: 0.011588518948018588
          policy_loss: -0.07664408572018147
          total_loss: 0.02781716858347257
          vf_explained_var: 0.6550095677375793
          vf_loss: 0.11509839893422193
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,350,8348.1,350000,4.18,10,-5,426.52


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-08_17-29-28
  done: false
  episode_len_mean: 425.95
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.22
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 894
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 0.954554471704695
          entropy_coeff: 0.009999999999999998
          kl: 0.010909862254822794
          policy_loss: 0.049268796377711824
          total_loss: 0.2100483213033941
          vf_explained_var: 0.7557373046875
          vf_loss: 0.1614780995580885
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,351,8364.69,351000,4.22,10,-5,425.95


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-08_17-29-49
  done: false
  episode_len_mean: 423.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.32
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 897
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.730429810947842
          entropy_coeff: 0.009999999999999998
          kl: 0.009475559078829734
          policy_loss: -0.08162351610759894
          total_loss: 0.08814454277356466
          vf_explained_var: 0.7621672749519348
          vf_loss: 0.17938848692509862
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,352,8385.42,352000,4.32,10,-2,423.7


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-08_17-30-09
  done: false
  episode_len_mean: 422.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.3
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 899
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9236743582619562
          entropy_coeff: 0.009999999999999998
          kl: 0.010895463909356924
          policy_loss: -0.03435766332679325
          total_loss: 0.08512851546208064
          vf_explained_var: 0.8617824912071228
          vf_loss: 0.12988762805859247
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,353,8405.6,353000,4.3,10,-2,422.14


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-08_17-30-29
  done: false
  episode_len_mean: 420.11
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.38
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 902
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5058815267350938
          entropy_coeff: 0.009999999999999998
          kl: 0.007397038047492228
          policy_loss: -0.12247354007429546
          total_loss: -0.02755670232905282
          vf_explained_var: 0.9551764726638794
          vf_loss: 0.10397728667077091
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,354,8425.95,354000,4.38,10,-2,420.11


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-08_17-30-46
  done: false
  episode_len_mean: 420.75
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.42
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 904
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6852982534302605
          entropy_coeff: 0.009999999999999998
          kl: 0.012536135053452154
          policy_loss: -0.010053643501467174
          total_loss: 0.20770572796463965
          vf_explained_var: 0.7153950333595276
          vf_loss: 0.22444661830862364
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,355,8442.5,355000,4.42,10,-2,420.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-08_17-31-05
  done: false
  episode_len_mean: 421.53
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.45
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 906
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.3228692240185207
          entropy_coeff: 0.009999999999999998
          kl: 0.007232155730358593
          policy_loss: -0.14723942635787857
          total_loss: -0.015337789555390675
          vf_explained_var: 0.8293382525444031
          vf_loss: 0.13926566988229752
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,356,8460.95,356000,4.45,10,-2,421.53


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-08_17-31-26
  done: false
  episode_len_mean: 419.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.29
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 909
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.3835640483432345
          entropy_coeff: 0.009999999999999998
          kl: 0.011978289172448697
          policy_loss: 0.10809783322943581
          total_loss: 0.3117563569711314
          vf_explained_var: 0.5812875628471375
          vf_loss: 0.20778079016341103
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,357,8482.48,357000,4.29,10,-7,419.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-08_17-31-45
  done: false
  episode_len_mean: 416.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.25
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 911
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2666997571786245
          entropy_coeff: 0.009999999999999998
          kl: 0.016599814517717703
          policy_loss: -0.008590690460469988
          total_loss: 0.25317744618902605
          vf_explained_var: 0.511573851108551
          vf_loss: 0.2609740972932842
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,358,8501.74,358000,4.25,10,-7,416.87




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-08_17-32-26
  done: false
  episode_len_mean: 413.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.25
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 914
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.003218940893809
          entropy_coeff: 0.009999999999999998
          kl: 0.005569051783947209
          policy_loss: 0.03317138254642486
          total_loss: 0.04843176110751099
          vf_explained_var: 0.9653071165084839
          vf_loss: 0.030776542280283238
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,359,8541.9,359000,4.25,10,-7,413.6


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-08_17-32-48
  done: false
  episode_len_mean: 411.69
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.28
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 916
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.654538971847958
          entropy_coeff: 0.009999999999999998
          kl: 0.008144822370488924
          policy_loss: -0.17672978714108467
          total_loss: -0.05409870110452175
          vf_explained_var: 0.7756829261779785
          vf_loss: 0.1325717187176148
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,360,8564.46,360000,4.28,10,-7,411.69


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-08_17-33-08
  done: false
  episode_len_mean: 413.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.24
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 919
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7516070551342435
          entropy_coeff: 0.009999999999999998
          kl: 0.011887560597898666
          policy_loss: 0.012145275870958963
          total_loss: 0.5003868684586551
          vf_explained_var: 0.24835027754306793
          vf_loss: 0.49611786918507683
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,361,8584.71,361000,4.24,10,-7,413.84


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-08_17-33-32
  done: false
  episode_len_mean: 411.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.32
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 922
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.715621128347185
          entropy_coeff: 0.009999999999999998
          kl: 0.009330757353957608
          policy_loss: -0.020883556372589537
          total_loss: 0.173964666430321
          vf_explained_var: 0.5875048041343689
          vf_loss: 0.20443799131446413
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,362,8608.6,362000,4.32,10,-7,411.03


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-08_17-33-50
  done: false
  episode_len_mean: 411.59
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.37
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 923
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6924083352088928
          entropy_coeff: 0.009999999999999998
          kl: 0.006516915334932112
          policy_loss: 0.053104611072275376
          total_loss: 0.13218544949260022
          vf_explained_var: 0.7674354314804077
          vf_loss: 0.09072025937752591
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,363,8626.16,363000,4.37,10,-7,411.59


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-08_17-34-11
  done: false
  episode_len_mean: 413.64
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.34
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 926
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7421093795034621
          entropy_coeff: 0.009999999999999998
          kl: 0.01446760652831287
          policy_loss: -0.04816474649641249
          total_loss: 0.17404458026091257
          vf_explained_var: 0.4739888906478882
          vf_loss: 0.22789842885815434
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,364,8647.39,364000,4.34,10,-7,413.64


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-08_17-34-29
  done: false
  episode_len_mean: 415.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.22
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 928
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6098508530192905
          entropy_coeff: 0.009999999999999998
          kl: 0.009848542265055452
          policy_loss: 0.011823818625675307
          total_loss: 0.22638341509219673
          vf_explained_var: 0.6338852643966675
          vf_loss: 0.22267177616142564
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,365,8664.99,365000,4.22,10,-7,415.03




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-08_17-35-34
  done: false
  episode_len_mean: 409.54
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.47
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 932
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6188291284773084
          entropy_coeff: 0.009999999999999998
          kl: 0.006176034525515206
          policy_loss: 0.13391905021336342
          total_loss: 0.15745431996054118
          vf_explained_var: 0.3653255105018616
          vf_loss: 0.03471532436605129
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,366,8730.25,366000,4.47,10,-7,409.54




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-08_17-36-40
  done: false
  episode_len_mean: 399.75
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.51
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 936
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.379351704650455
          entropy_coeff: 0.009999999999999998
          kl: 0.010298102375058033
          policy_loss: 0.062351032594839734
          total_loss: 0.27894492741260263
          vf_explained_var: 0.8677743673324585
          vf_loss: 0.22203652991188896
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,367,8796.25,367000,4.51,10,-7,399.75


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-08_17-36-59
  done: false
  episode_len_mean: 399.77
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.54
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 938
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.0770042485660976
          entropy_coeff: 0.009999999999999998
          kl: 0.008666030119758123
          policy_loss: -0.01930840313434601
          total_loss: 0.07304027531709936
          vf_explained_var: 0.25478607416152954
          vf_loss: 0.10609130778660376
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,368,8815.58,368000,4.54,10,-7,399.77




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-08_17-38-18
  done: false
  episode_len_mean: 390.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.78
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 942
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.7500200496779548
          entropy_coeff: 0.009999999999999998
          kl: 0.007485080647776717
          policy_loss: -0.004459962621331215
          total_loss: 0.05974778139756785
          vf_explained_var: 0.7373389601707458
          vf_loss: 0.07563818173689975
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,369,8893.83,369000,4.78,10,-7,390.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-08_17-38-38
  done: false
  episode_len_mean: 390.1
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.85
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 944
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5710423641734652
          entropy_coeff: 0.009999999999999998
          kl: 0.012975762307613298
          policy_loss: -0.013992163497540686
          total_loss: 0.15493773031565877
          vf_explained_var: 0.5770056247711182
          vf_loss: 0.17411807597511345
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,370,8914.1,370000,4.85,10,-7,390.1


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-08_17-38-52
  done: false
  episode_len_mean: 390.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.88
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 946
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.26333523425791
          entropy_coeff: 0.009999999999999998
          kl: 0.004040184704739626
          policy_loss: -0.08035351874099837
          total_loss: -0.0252299175494247
          vf_explained_var: 0.3035734295845032
          vf_loss: 0.06448070977090134
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,371,8928.43,371000,4.88,10,-7,390.99




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-08_17-39-50
  done: false
  episode_len_mean: 385.45
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.02
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 949
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8036207331551446
          entropy_coeff: 0.009999999999999998
          kl: 0.010198235660821817
          policy_loss: -0.04521011614965068
          total_loss: 0.06359517977883418
          vf_explained_var: 0.3054672181606293
          vf_loss: 0.1227065556579166
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,372,8985.66,372000,5.02,10,-7,385.45


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-08_17-40-09
  done: false
  episode_len_mean: 385.0
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.13
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 951
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.804124497042762
          entropy_coeff: 0.009999999999999998
          kl: 0.01133546591567275
          policy_loss: -0.025848662956721252
          total_loss: -0.01624097288068798
          vf_explained_var: 0.793643593788147
          vf_loss: 0.02305288752540946
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,373,9004.71,373000,5.13,10,-7,385


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-08_17-40-26
  done: false
  episode_len_mean: 386.63
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.21
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 953
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.5512685391638015
          entropy_coeff: 0.009999999999999998
          kl: 0.015704520251174456
          policy_loss: -0.01873057766093148
          total_loss: 0.05616562621047099
          vf_explained_var: 0.8303840756416321
          vf_loss: 0.0840413776329822
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,374,9021.72,374000,5.21,10,-7,386.63




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-08_17-41-28
  done: false
  episode_len_mean: 384.16
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.46
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 956
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.4755149788326687
          entropy_coeff: 0.009999999999999998
          kl: 0.01597804960731981
          policy_loss: -0.10984786310129696
          total_loss: 0.032219856811894314
          vf_explained_var: 0.6209450364112854
          vf_loss: 0.15034445693923368
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,375,9083.9,375000,5.46,10,-7,384.16




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-08_17-42-26
  done: false
  episode_len_mean: 380.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.61
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 960
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9282672312524585
          entropy_coeff: 0.009999999999999998
          kl: 0.011042164140623041
          policy_loss: -0.061518556997179986
          total_loss: -0.023311550832457013
          vf_explained_var: 0.32085084915161133
          vf_loss: 0.05301255044113431
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,376,9141.92,376000,5.61,10,-7,380.29


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-08_17-42-48
  done: false
  episode_len_mean: 381.38
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.65
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 962
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 2.1251539561483597
          entropy_coeff: 0.009999999999999998
          kl: 0.01675214265838567
          policy_loss: -0.15052046622667048
          total_loss: -0.09118927733765708
          vf_explained_var: 0.20505733788013458
          vf_loss: 0.07379044902821381
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,377,9164.27,377000,5.65,10,-7,381.38




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-08_17-43-51
  done: false
  episode_len_mean: 374.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.68
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 966
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8607893294758266
          entropy_coeff: 0.009999999999999998
          kl: 0.0236427940531639
          policy_loss: -0.052777079823944306
          total_loss: 0.3178852492322524
          vf_explained_var: 0.5482766628265381
          vf_loss: 0.3796840842399332
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,378,9226.52,378000,5.68,10,-7,374.22


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-08_17-44-07
  done: false
  episode_len_mean: 373.4
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.69
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 968
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.2308708906173704
          entropy_coeff: 0.009999999999999998
          kl: 0.005115655586964193
          policy_loss: -0.12184673075874647
          total_loss: -0.09549220734172398
          vf_explained_var: 0.5118066668510437
          vf_loss: 0.04555196393436442
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,379,9243.12,379000,5.69,10,-7,373.4


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-08_17-44-23
  done: false
  episode_len_mean: 377.36
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.74
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 970
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3346169690291088
          entropy_coeff: 0.009999999999999998
          kl: 0.012816326518006753
          policy_loss: -0.014209635059038798
          total_loss: 0.2661493538878858
          vf_explained_var: 0.6711324453353882
          vf_loss: 0.2859104483284884
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,380,9258.81,380000,5.74,10,-7,377.36


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-08_17-44-42
  done: false
  episode_len_mean: 379.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.81
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 972
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7184542377789815
          entropy_coeff: 0.009999999999999998
          kl: 0.01150601395077461
          policy_loss: -0.011930094949073261
          total_loss: 0.04261097022228771
          vf_explained_var: 0.8187891840934753
          vf_loss: 0.06472781172229183
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,381,9278.08,381000,5.81,10,-7,379.31




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-08_17-45-37
  done: false
  episode_len_mean: 379.72
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.85
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 976
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.841387512948778
          entropy_coeff: 0.009999999999999998
          kl: 0.011181314078458569
          policy_loss: -0.07092296249336666
          total_loss: 0.3068631436261866
          vf_explained_var: 0.8768668174743652
          vf_loss: 0.38939965690175693
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,382,9333.3,382000,5.85,10,-7,379.72


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-08_17-45-56
  done: false
  episode_len_mean: 381.35
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.84
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 978
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7956297755241395
          entropy_coeff: 0.009999999999999998
          kl: 0.013694528970318448
          policy_loss: 0.035999013690484895
          total_loss: 0.2241380325725509
          vf_explained_var: 0.8564919233322144
          vf_loss: 0.19776649399557047
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,383,9351.85,383000,5.84,10,-7,381.35


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-08_17-46-15
  done: false
  episode_len_mean: 385.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.78
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 981
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9363287806510925
          entropy_coeff: 0.009999999999999998
          kl: 0.008926100298252729
          policy_loss: -0.08245632747809092
          total_loss: 0.03448993671271536
          vf_explained_var: 0.28134670853614807
          vf_loss: 0.13088082037866117
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,384,9370.42,384000,5.78,10,-7,385.87


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-08_17-46-32
  done: false
  episode_len_mean: 392.12
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.8
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 983
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9294763697518242
          entropy_coeff: 0.009999999999999998
          kl: 0.01458678755162036
          policy_loss: -0.058871866369413
          total_loss: 9.3714768687884e-05
          vf_explained_var: 0.8877391219139099
          vf_loss: 0.06938886580367883
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,385,9388.19,385000,5.8,10,-7,392.12




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-08_17-47-13
  done: false
  episode_len_mean: 384.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.94
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 987
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3526114573081334
          entropy_coeff: 0.009999999999999998
          kl: 0.01153227447652268
          policy_loss: 0.01688614326218764
          total_loss: 0.24745873742633395
          vf_explained_var: 0.7064476609230042
          vf_loss: 0.23708494388394885
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,386,9429.13,386000,5.94,10,-7,384.91


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-08_17-47-31
  done: false
  episode_len_mean: 384.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.98
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 989
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.094419929716322
          entropy_coeff: 0.009999999999999998
          kl: 0.007042172377487393
          policy_loss: 0.07979510679013199
          total_loss: 0.14705312078197796
          vf_explained_var: 0.7041260004043579
          vf_loss: 0.08391926432442334
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,387,9447.14,387000,5.98,10,-7,384.7


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-08_17-47-51
  done: false
  episode_len_mean: 384.52
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.01
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 991
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9738041838010152
          entropy_coeff: 0.009999999999999998
          kl: 0.008812024535868594
          policy_loss: -0.14537332066231304
          total_loss: -0.050307838908500144
          vf_explained_var: 0.868851900100708
          vf_loss: 0.10944417297012277
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,388,9466.54,388000,6.01,10,-7,384.52




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-08_17-48-52
  done: false
  episode_len_mean: 379.47
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.08
  episode_reward_min: -7.0
  episodes_this_iter: 4
  episodes_total: 995
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.076567061742147
          entropy_coeff: 0.009999999999999998
          kl: 0.007336625075732023
          policy_loss: 0.10593364867899153
          total_loss: 0.15224777025481065
          vf_explained_var: 0.30936476588249207
          vf_loss: 0.06261776226262251
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,389,9527.75,389000,6.08,10,-7,379.47


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-08_17-49-16
  done: false
  episode_len_mean: 378.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.09
  episode_reward_min: -7.0
  episodes_this_iter: 2
  episodes_total: 997
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4193598369757334
          entropy_coeff: 0.009999999999999998
          kl: 0.009829077049640105
          policy_loss: 0.02928828563955095
          total_loss: 0.13950107180409962
          vf_explained_var: 0.7012165188789368
          vf_loss: 0.11842847673429382
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,390,9551.37,390000,6.09,10,-7,378.22


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-08_17-49-37
  done: false
  episode_len_mean: 378.37
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.17
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 1000
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.925078308582306
          entropy_coeff: 0.009999999999999998
          kl: 0.010765469457978977
          policy_loss: 0.02937929452293449
          total_loss: 0.07170626856386661
          vf_explained_var: 0.9645277261734009
          vf_loss: 0.0550303487210638
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,391,9572.98,391000,6.17,10,-7,378.37




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-08_17-50-17
  done: false
  episode_len_mean: 373.59
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.17
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 1003
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.963170666164822
          entropy_coeff: 0.009999999999999998
          kl: 0.012549588521156944
          policy_loss: -0.0987851944234636
          total_loss: 0.044344087193409605
          vf_explained_var: 0.9507886171340942
          vf_loss: 0.15512850810256268
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,392,9612.35,392000,6.17,10,-7,373.59




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-08_17-51-12
  done: false
  episode_len_mean: 368.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.42
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 1007
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.0010391182369656
          entropy_coeff: 0.009999999999999998
          kl: 0.008673616574870839
          policy_loss: 0.03342527062114742
          total_loss: 0.041621829393423265
          vf_explained_var: 0.8318814039230347
          vf_loss: 0.02293177692530056
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,393,9668.03,393000,6.42,10,-1,368.46




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-08_17-52-21
  done: false
  episode_len_mean: 360.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.61
  episode_reward_min: -1.0
  episodes_this_iter: 4
  episodes_total: 1011
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.1916149112913343
          entropy_coeff: 0.009999999999999998
          kl: 0.0035805950532690842
          policy_loss: 0.007667905754513211
          total_loss: 0.020513161395986874
          vf_explained_var: 0.1413925439119339
          vf_loss: 0.03258373618074176
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,394,9736.72,394000,6.61,10,-1,360.71




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-08_17-53-08
  done: false
  episode_len_mean: 358.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.64
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1014
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9274248666233487
          entropy_coeff: 0.009999999999999998
          kl: 0.01188157897314519
          policy_loss: 0.009079036116600037
          total_loss: 0.11210259513722526
          vf_explained_var: 0.734556257724762
          vf_loss: 0.11868470204580162
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,395,9783.98,395000,6.64,10,-1,358.79


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-08_17-53-28
  done: false
  episode_len_mean: 359.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.62
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1017
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9866739339298671
          entropy_coeff: 0.009999999999999998
          kl: 0.020040418922737994
          policy_loss: -0.010643381708198124
          total_loss: 0.1730095576081011
          vf_explained_var: 0.3460218906402588
          vf_loss: 0.19742552960912388
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,396,9803.27,396000,6.62,10,-1,359.6


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-08_17-53-47
  done: false
  episode_len_mean: 359.09
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.67
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1019
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.124930187066396
          entropy_coeff: 0.009999999999999998
          kl: 0.009052377928280899
          policy_loss: 0.023309642614589798
          total_loss: 0.05122054869102107
          vf_explained_var: 0.3369148373603821
          vf_loss: 0.04503106120456424
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,397,9822.84,397000,6.67,10,-1,359.09


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-08_17-54-06
  done: false
  episode_len_mean: 360.43
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.7
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1021
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.107327604293823
          entropy_coeff: 0.009999999999999998
          kl: 0.010380805683660909
          policy_loss: -0.049075401822725934
          total_loss: -0.028209493392043643
          vf_explained_var: 0.8433452844619751
          vf_loss: 0.03720409087836742
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,398,9841.65,398000,6.7,10,-1,360.43


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-08_17-54-26
  done: false
  episode_len_mean: 359.02
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.67
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1024
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.0528573089175755
          entropy_coeff: 0.009999999999999998
          kl: 0.008944171497360124
          policy_loss: 0.015588107208410898
          total_loss: 0.11307972214288181
          vf_explained_var: 0.9474902749061584
          vf_loss: 0.11394039430759019
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,399,9861.46,399000,6.67,10,-1,359.02


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-08_17-54-48
  done: false
  episode_len_mean: 356.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.74
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1026
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7460898942417569
          entropy_coeff: 0.009999999999999998
          kl: 0.00966125930754921
          policy_loss: -0.0653795657058557
          total_loss: -0.037263470060295525
          vf_explained_var: 0.5016469359397888
          vf_loss: 0.041170112840417356
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,400,9883.53,400000,6.74,10,-1,356.79




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-08_17-55-46
  done: false
  episode_len_mean: 354.88
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.8
  episode_reward_min: 2.0
  episodes_this_iter: 4
  episodes_total: 1030
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.044999913374583
          entropy_coeff: 0.009999999999999998
          kl: 0.016827864841145828
          policy_loss: -0.003898665433128675
          total_loss: 0.40895575046953225
          vf_explained_var: 0.7155667543411255
          vf_loss: 0.4256285604917341
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,401,9941.68,401000,6.8,10,2,354.88


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-08_17-56-07
  done: false
  episode_len_mean: 358.76
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.77
  episode_reward_min: 2.0
  episodes_this_iter: 2
  episodes_total: 1032
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.1054836644066706
          entropy_coeff: 0.009999999999999998
          kl: 0.005064050266975843
          policy_loss: -0.17403813031398588
          total_loss: -0.14743856456544663
          vf_explained_var: 0.3105683922767639
          vf_loss: 0.04534448907555391
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,402,9962.88,402000,6.77,10,2,358.76




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-08_17-57-07
  done: false
  episode_len_mean: 356.61
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.69
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 1035
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7228209680981106
          entropy_coeff: 0.009999999999999998
          kl: 0.019182970160141713
          policy_loss: -0.04831321231193013
          total_loss: 0.2701676624102725
          vf_explained_var: 0.569747269153595
          vf_loss: 0.32695897511310046
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,403,10022.4,403000,6.69,10,-5,356.61


Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-08_17-57-24
  done: false
  episode_len_mean: 361.59
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.71
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 1038
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.951587892903222
          entropy_coeff: 0.009999999999999998
          kl: 0.01162919142881829
          policy_loss: -0.035001074771086375
          total_loss: -0.008892037636703915
          vf_explained_var: 0.8332535028457642
          vf_loss: 0.040320383488304086
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,404,10039.9,404000,6.71,10,-5,361.59




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-08_17-58-31
  done: false
  episode_len_mean: 358.83
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.7
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 1041
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6849386188719007
          entropy_coeff: 0.009999999999999998
          kl: 0.0110652988392656
          policy_loss: -0.09066795044475132
          total_loss: -0.0070169651259978615
          vf_explained_var: 0.8144615292549133
          vf_loss: 0.0954530489527517
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,405,10106.6,405000,6.7,10,-5,358.83




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-08_18-00-27
  done: false
  episode_len_mean: 345.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.81
  episode_reward_min: -5.0
  episodes_this_iter: 7
  episodes_total: 1048
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7165810704231261
          entropy_coeff: 0.009999999999999998
          kl: 0.017235039163420594
          policy_loss: 0.18550394765204853
          total_loss: 0.3280689977109432
          vf_explained_var: 0.8468323349952698
          vf_loss: 0.15186927875296938
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,406,10222.8,406000,6.81,10,-5,345.29




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-08_18-02-47
  done: false
  episode_len_mean: 321.74
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.92
  episode_reward_min: -5.0
  episodes_this_iter: 7
  episodes_total: 1055
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.4996302054988013
          entropy_coeff: 0.009999999999999998
          kl: 0.013181651949368383
          policy_loss: -0.1565324506825871
          total_loss: 0.08715519981665744
          vf_explained_var: 0.3126491606235504
          vf_loss: 0.25267127737816836
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,407,10362.8,407000,6.92,10,-5,321.74




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-08_18-05-48
  done: false
  episode_len_mean: 304.27
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.12
  episode_reward_min: -5.0
  episodes_this_iter: 10
  episodes_total: 1065
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.4842854075961642
          entropy_coeff: 0.009999999999999998
          kl: 0.01054035931984981
          policy_loss: -0.09714475588666069
          total_loss: 0.08588763781719738
          vf_explained_var: 0.3841882348060608
          vf_loss: 0.1930673650931567
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,408,10543.5,408000,7.12,10,-5,304.27




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-08_18-06-26
  done: false
  episode_len_mean: 300.12
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.15
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 1068
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7715070565541586
          entropy_coeff: 0.009999999999999998
          kl: 0.015562889405258358
          policy_loss: 0.009558311270342932
          total_loss: 0.48839850339831575
          vf_explained_var: 0.3502582311630249
          vf_loss: 0.4894564071463214
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,409,10580.8,409000,7.15,10,-5,300.12




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-08_18-07-31
  done: false
  episode_len_mean: 290.16
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.18
  episode_reward_min: -5.0
  episodes_this_iter: 4
  episodes_total: 1072
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.5084158658981324
          entropy_coeff: 0.009999999999999998
          kl: 0.012258325050510482
          policy_loss: -0.12783834147784445
          total_loss: 0.015682006627321242
          vf_explained_var: 0.9588323831558228
          vf_loss: 0.15301300171348783
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,410,10646.2,410000,7.18,10,-5,290.16




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-08_18-09-54
  done: false
  episode_len_mean: 270.4
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.37
  episode_reward_min: -5.0
  episodes_this_iter: 9
  episodes_total: 1081
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6374191019270155
          entropy_coeff: 0.009999999999999998
          kl: 0.013076143478521414
          policy_loss: -0.1034759828613864
          total_loss: 0.03883657331267993
          vf_explained_var: 0.9796387553215027
          vf_loss: 0.1527222015377548
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,411,10789.7,411000,7.37,10,-5,270.4




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-08_18-14-24
  done: false
  episode_len_mean: 227.74
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.72
  episode_reward_min: -5.0
  episodes_this_iter: 15
  episodes_total: 1096
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 0.97809320009417
          entropy_coeff: 0.009999999999999998
          kl: 0.01603357505646596
          policy_loss: -0.042377182344595594
          total_loss: 0.4077900965594583
          vf_explained_var: 0.9452282190322876
          vf_loss: 0.45263466007179687
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,412,11059.2,412000,7.72,10,-5,227.74




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-08_18-16-37
  done: false
  episode_len_mean: 208.67
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.94
  episode_reward_min: -5.0
  episodes_this_iter: 8
  episodes_total: 1104
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6270766615867616
          entropy_coeff: 0.009999999999999998
          kl: 0.012051023475252625
          policy_loss: -0.0072445067266623175
          total_loss: 0.12271466520097521
          vf_explained_var: 0.8321728110313416
          vf_loss: 0.14073299144705137
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,413,11192.2,413000,7.94,10,-5,208.67




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-08_18-19-15
  done: false
  episode_len_mean: 196.36
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.02
  episode_reward_min: -5.0
  episodes_this_iter: 8
  episodes_total: 1112
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7260015938017104
          entropy_coeff: 0.009999999999999998
          kl: 0.013951566935012849
          policy_loss: -0.014896022031704585
          total_loss: 0.15067578090561762
          vf_explained_var: 0.673677384853363
          vf_loss: 0.17646796007951102
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,414,11350.5,414000,8.02,10,-5,196.36




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-08_18-21-30
  done: false
  episode_len_mean: 178.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.3
  episode_reward_min: -5.0
  episodes_this_iter: 7
  episodes_total: 1119
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6054788178867765
          entropy_coeff: 0.009999999999999998
          kl: 0.014003478227576052
          policy_loss: -0.09974916825691858
          total_loss: 0.24106390265127023
          vf_explained_var: 0.838350236415863
          vf_loss: 0.3504803203874164
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,415,11485.6,415000,8.3,10,-5,178.22




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-08_18-24-37
  done: false
  episode_len_mean: 146.01
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.51
  episode_reward_min: -5.0
  episodes_this_iter: 12
  episodes_total: 1131
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.5190176321400537
          entropy_coeff: 0.009999999999999998
          kl: 0.010091227193148929
          policy_loss: -0.0010486279096868303
          total_loss: 0.19333688418070474
          vf_explained_var: 0.8887879252433777
          vf_loss: 0.20497267916798592
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,416,11672.5,416000,8.51,10,-5,146.01




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-08_18-25-38
  done: false
  episode_len_mean: 143.48
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.66
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 1135
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7393817252582975
          entropy_coeff: 0.009999999999999998
          kl: 0.01635850087743313
          policy_loss: -0.04871884356770251
          total_loss: 0.10461008274513814
          vf_explained_var: 0.6193366050720215
          vf_loss: 0.1632609889532129
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,417,11733.3,417000,8.66,10,-2,143.48




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-08_18-28-28
  done: false
  episode_len_mean: 121.62
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.91
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 1144
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.312792248196072
          entropy_coeff: 0.009999999999999998
          kl: 0.007812925585737516
          policy_loss: -0.09402027461263869
          total_loss: 0.06197232630931669
          vf_explained_var: 0.9660365581512451
          vf_loss: 0.16555673778057098
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,418,11903.3,418000,8.91,10,-2,121.62




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-08_18-30-26
  done: false
  episode_len_mean: 127.5
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.78
  episode_reward_min: -2.0
  episodes_this_iter: 8
  episodes_total: 1152
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.42722622818417
          entropy_coeff: 0.009999999999999998
          kl: 0.02201398032508241
          policy_loss: -0.19156934916973115
          total_loss: 0.24219994693994523
          vf_explained_var: 0.6687705516815186
          vf_loss: 0.4380001133307815
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,419,12021.3,419000,8.78,10,-2,127.5




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-08_18-31-24
  done: false
  episode_len_mean: 128.13
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.61
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 1156
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.5738774140675862
          entropy_coeff: 0.009999999999999998
          kl: 0.020080204269755954
          policy_loss: -0.07753506956828965
          total_loss: 0.346898462706142
          vf_explained_var: 0.8598333597183228
          vf_loss: 0.42643324923184184
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,420,12079.4,420000,8.61,10,-3,128.13




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-08_18-42-13
  done: false
  episode_len_mean: 94.38
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.97
  episode_reward_min: -3.0
  episodes_this_iter: 35
  episodes_total: 1191
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.6239207181665632
          entropy_coeff: 0.009999999999999998
          kl: 0.014148846537667618
          policy_loss: -0.007912888957394493
          total_loss: 0.20356609635055065
          vf_explained_var: 0.9798527956008911
          vf_loss: 0.20319703850481247
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,421,12728.2,421000,8.97,10,-3,94.38




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-08_18-46-01
  done: false
  episode_len_mean: 90.11
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.96
  episode_reward_min: -3.0
  episodes_this_iter: 13
  episodes_total: 1204
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.2158998754289416
          entropy_coeff: 0.009999999999999998
          kl: 0.007909021425826722
          policy_loss: -0.1567625596291489
          total_loss: 0.12962437296907106
          vf_explained_var: 0.6933659911155701
          vf_loss: 0.2904287954999341
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,422,12955.8,422000,8.96,10,-3,90.11




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-08_18-50-44
  done: false
  episode_len_mean: 83.76
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.03
  episode_reward_min: -3.0
  episodes_this_iter: 15
  episodes_total: 1219
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.0485549403561487
          entropy_coeff: 0.009999999999999998
          kl: 0.008548879522897027
          policy_loss: 0.02304066920446025
          total_loss: 0.24215527052680652
          vf_explained_var: 0.9754531979560852
          vf_loss: 0.22082631869448555
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,423,13239,423000,9.03,10,-3,83.76




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-08_18-54-32
  done: false
  episode_len_mean: 76.13
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.19
  episode_reward_min: -3.0
  episodes_this_iter: 13
  episodes_total: 1232
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.2939164340496063
          entropy_coeff: 0.009999999999999998
          kl: 0.007599162316884249
          policy_loss: -0.15076049491763116
          total_loss: 0.13930291291843686
          vf_explained_var: 0.7931714653968811
          vf_loss: 0.29520344419611827
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,424,13466.8,424000,9.19,10,-3,76.13




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-08_19-03-51
  done: false
  episode_len_mean: 47.42
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.64
  episode_reward_min: 2.0
  episodes_this_iter: 30
  episodes_total: 1262
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.9022077613406712
          entropy_coeff: 0.009999999999999998
          kl: 0.007942282342230437
          policy_loss: 0.03286443005005519
          total_loss: 0.2740702026420169
          vf_explained_var: 0.9712600111961365
          vf_loss: 0.24207658047477404
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,425,14026,425000,9.64,10,2,47.42




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-08_19-16-19
  done: false
  episode_len_mean: 44.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.7
  episode_reward_min: 2.0
  episodes_this_iter: 39
  episodes_total: 1301
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.5839931428432464
          entropy_coeff: 0.009999999999999998
          kl: 0.002569724062694645
          policy_loss: -0.2571816358301375
          total_loss: -0.21538022889031305
          vf_explained_var: 0.9955976009368896
          vf_loss: 0.04500399546490775
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,426,14773.6,426000,9.7,10,2,44.28




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-08_19-17-58
  done: false
  episode_len_mean: 42.63
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.65
  episode_reward_min: 2.0
  episodes_this_iter: 6
  episodes_total: 1307
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.3765932771894667
          entropy_coeff: 0.009999999999999998
          kl: 0.012683890716551164
          policy_loss: -0.05795346779955758
          total_loss: 0.13411048222333194
          vf_explained_var: 0.8756049275398254
          vf_loss: 0.19932105967568026
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,427,14872.5,427000,9.65,10,2,42.63




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-08_19-23-55
  done: false
  episode_len_mean: 46.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.53
  episode_reward_min: 2.0
  episodes_this_iter: 20
  episodes_total: 1327
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.9795435276296404
          entropy_coeff: 0.009999999999999998
          kl: 0.007705521945488848
          policy_loss: -0.17828210530181726
          total_loss: 0.022443459224369793
          vf_explained_var: 0.9093360304832458
          vf_loss: 0.206566861561603
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,428,15230.3,428000,9.53,10,2,46.22




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-08_19-28-54
  done: false
  episode_len_mean: 43.4
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.6
  episode_reward_min: 2.0
  episodes_this_iter: 17
  episodes_total: 1344
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.4936184194352893
          entropy_coeff: 0.009999999999999998
          kl: 0.012299113893410605
          policy_loss: -0.03510694209900167
          total_loss: 0.16483328027857674
          vf_explained_var: 0.6049861907958984
          vf_loss: 0.20856502912938596
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,429,15528.9,429000,9.6,10,2,43.4




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-08_19-34-11
  done: false
  episode_len_mean: 50.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.47
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 1361
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.2077673256397248
          entropy_coeff: 0.009999999999999998
          kl: 0.016088210803084695
          policy_loss: 0.1350923948817783
          total_loss: 0.42163475503524145
          vf_explained_var: 0.8322502970695496
          vf_loss: 0.2903642610543304
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,430,15845.5,430000,9.47,10,0,50.14




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-08_19-39-11
  done: false
  episode_len_mean: 54.94
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.45
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 1377
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.338613282971912
          entropy_coeff: 0.009999999999999998
          kl: 0.008920884400228759
          policy_loss: -0.06542806459797754
          total_loss: 0.0031263210707240634
          vf_explained_var: 0.5497366189956665
          vf_loss: 0.07736270535323354
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,431,16146,431000,9.45,10,0,54.94




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-08_19-44-14
  done: false
  episode_len_mean: 62.07
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.27
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 1394
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.064276385307312
          entropy_coeff: 0.009999999999999998
          kl: 0.01952054651418421
          policy_loss: -0.04418809053798516
          total_loss: 0.3164227633840508
          vf_explained_var: 0.9114653468132019
          vf_loss: 0.3612365235057142
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,432,16448.3,432000,9.27,10,0,62.07




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-08_19-48-37
  done: false
  episode_len_mean: 58.82
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.3
  episode_reward_min: -4.0
  episodes_this_iter: 15
  episodes_total: 1409
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.0634526332219443
          entropy_coeff: 0.009999999999999998
          kl: 0.012966667370698499
          policy_loss: -0.11170579906966951
          total_loss: 0.16364508424368168
          vf_explained_var: 0.6871446967124939
          vf_loss: 0.2793314728471968
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,433,16711.5,433000,9.3,10,-4,58.82




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-08_19-51-26
  done: false
  episode_len_mean: 67.23
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.13
  episode_reward_min: -4.0
  episodes_this_iter: 10
  episodes_total: 1419
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.623931849002838
          entropy_coeff: 0.009999999999999998
          kl: 0.01984209667038886
          policy_loss: 0.06369750963317024
          total_loss: 0.33667141223947206
          vf_explained_var: 0.6073736548423767
          vf_loss: 0.27903111336959735
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,434,16881.1,434000,9.13,10,-4,67.23




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-08_19-57-31
  done: false
  episode_len_mean: 59.21
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.23
  episode_reward_min: -4.0
  episodes_this_iter: 19
  episodes_total: 1438
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.9897212074862586
          entropy_coeff: 0.009999999999999998
          kl: 0.013874697762891191
          policy_loss: -0.207474684715271
          total_loss: 0.33190356170137725
          vf_explained_var: 0.9211868047714233
          vf_loss: 0.5421555642452505
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,435,17245.3,435000,9.23,10,-4,59.21




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-08_20-01-39
  done: false
  episode_len_mean: 64.57
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.09
  episode_reward_min: -4.0
  episodes_this_iter: 15
  episodes_total: 1453
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.0730046742492252
          entropy_coeff: 0.009999999999999998
          kl: 0.0124027967771422
          policy_loss: 0.06726069707009527
          total_loss: 0.5750918790284131
          vf_explained_var: 0.940179705619812
          vf_loss: 0.512196656399303
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,436,17493.6,436000,9.09,10,-4,64.57




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-08_20-10-42
  done: false
  episode_len_mean: 55.97
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.18
  episode_reward_min: -4.0
  episodes_this_iter: 29
  episodes_total: 1482
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.7820501377185186
          entropy_coeff: 0.009999999999999998
          kl: 0.008117704721526191
          policy_loss: 0.012277906388044357
          total_loss: 0.18182885779274835
          vf_explained_var: 0.5608119368553162
          vf_loss: 0.1732057976639933
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,437,18036.7,437000,9.18,10,-4,55.97




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-08_20-18-49
  done: false
  episode_len_mean: 50.76
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.41
  episode_reward_min: -3.0
  episodes_this_iter: 26
  episodes_total: 1508
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.0839086754454508
          entropy_coeff: 0.009999999999999998
          kl: 0.016315550959625055
          policy_loss: -0.03561833699544271
          total_loss: 0.3592434090148244
          vf_explained_var: 0.5805529952049255
          vf_loss: 0.3973283891048696
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,438,18524.2,438000,9.41,10,-3,50.76




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-08_20-27-33
  done: false
  episode_len_mean: 43.0
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.57
  episode_reward_min: -3.0
  episodes_this_iter: 28
  episodes_total: 1536
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.0949523667494456
          entropy_coeff: 0.009999999999999998
          kl: 0.01588294696402204
          policy_loss: 0.024060288319985072
          total_loss: 0.2811321049928665
          vf_explained_var: 0.7413131594657898
          vf_loss: 0.2598708973990546
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,439,19047.8,439000,9.57,10,-3,43




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-08_20-31-10
  done: false
  episode_len_mean: 43.73
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.54
  episode_reward_min: -1.0
  episodes_this_iter: 13
  episodes_total: 1549
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.8330875822239452
          entropy_coeff: 0.009999999999999998
          kl: 0.008763198590859705
          policy_loss: -0.05022023994889524
          total_loss: 0.33526170137855743
          vf_explained_var: 0.9590905904769897
          vf_loss: 0.38931591196192633
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,440,19265.1,440000,9.54,10,-1,43.73




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-08_20-34-02
  done: false
  episode_len_mean: 43.94
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.5
  episode_reward_min: -1.0
  episodes_this_iter: 9
  episodes_total: 1558
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.2346592638227674
          entropy_coeff: 0.009999999999999998
          kl: 0.018221831455976716
          policy_loss: -0.054160619030396144
          total_loss: 0.42375589782993
          vf_explained_var: 0.4349954426288605
          vf_loss: 0.48091244250535964
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,441,19436.4,441000,9.5,10,-1,43.94




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-08_20-44-40
  done: false
  episode_len_mean: 41.21
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.54
  episode_reward_min: -1.0
  episodes_this_iter: 34
  episodes_total: 1592
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.4782712426450517
          entropy_coeff: 0.009999999999999998
          kl: 0.0032250333323269862
          policy_loss: -0.17364733583397335
          total_loss: 0.035012715227074095
          vf_explained_var: 0.9825040102005005
          vf_loss: 0.21178781458487114
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,442,20074.9,442000,9.54,10,-1,41.21




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-08_20-55-50
  done: false
  episode_len_mean: 41.55
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.59
  episode_reward_min: -1.0
  episodes_this_iter: 36
  episodes_total: 1628
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.7005860752529568
          entropy_coeff: 0.009999999999999998
          kl: 0.013543921584192967
          policy_loss: -0.05791283133957121
          total_loss: 0.02394189462065697
          vf_explained_var: 0.9935194849967957
          vf_loss: 0.08538550262649854
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,443,20744.8,443000,9.59,10,-1,41.55




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-08_21-11-43
  done: false
  episode_len_mean: 25.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.95
  episode_reward_min: 5.0
  episodes_this_iter: 50
  episodes_total: 1678
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.40432808101177214
          entropy_coeff: 0.009999999999999998
          kl: 0.0072814849158108035
          policy_loss: -0.05465913189368116
          total_loss: -0.03705448324067725
          vf_explained_var: 0.997933030128479
          vf_loss: 0.019779654861324363
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,444,21697.2,444000,9.95,10,5,25.6




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-08_21-18-58
  done: false
  episode_len_mean: 24.9
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.95
  episode_reward_min: 5.0
  episodes_this_iter: 23
  episodes_total: 1701
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.9529898948139615
          entropy_coeff: 0.009999999999999998
          kl: 0.012488734223232158
          policy_loss: -0.04593011596136623
          total_loss: 0.18107065856456755
          vf_explained_var: 0.8822134137153625
          vf_loss: 0.2333263285872009
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,445,22132.5,445000,9.95,10,5,24.9




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-08_21-22-07
  done: false
  episode_len_mean: 33.5
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.87
  episode_reward_min: 5.0
  episodes_this_iter: 11
  episodes_total: 1712
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 1.261567813820309
          entropy_coeff: 0.009999999999999998
          kl: 0.01938983019369432
          policy_loss: -0.057127623053060635
          total_loss: 0.17456127719746695
          vf_explained_var: 0.878809928894043
          vf_loss: 0.23932956515087023
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,446,22321.1,446000,9.87,10,5,33.5




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-08_21-32-06
  done: false
  episode_len_mean: 36.82
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.77
  episode_reward_min: 0.0
  episodes_this_iter: 32
  episodes_total: 1744
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.8187974459595151
          entropy_coeff: 0.009999999999999998
          kl: 0.019272237508322996
          policy_loss: -0.03200320394502746
          total_loss: 0.419122522086319
          vf_explained_var: 0.9256473779678345
          vf_loss: 0.454368868470192
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,447,22920.3,447000,9.77,10,0,36.82




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-08_21-45-12
  done: false
  episode_len_mean: 37.44
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.77
  episode_reward_min: 0.0
  episodes_this_iter: 41
  episodes_total: 1785
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.501581965221299
          entropy_coeff: 0.009999999999999998
          kl: 0.003353590960274487
          policy_loss: -0.1451002188026905
          total_loss: -0.12742593155966864
          vf_explained_var: 0.998026967048645
          vf_loss: 0.021829647053447036
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,448,23706.2,448000,9.77,10,0,37.44




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-08_21-48-04
  done: false
  episode_len_mean: 42.39
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.65
  episode_reward_min: -1.0
  episodes_this_iter: 10
  episodes_total: 1795
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 1.4282671637005275
          entropy_coeff: 0.009999999999999998
          kl: 0.02492389443724152
          policy_loss: -0.046922378490368524
          total_loss: 0.39581886637541985
          vf_explained_var: 0.7037784457206726
          vf_loss: 0.45382645212941697
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,449,23878.2,449000,9.65,10,-1,42.39




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-08_21-55-38
  done: false
  episode_len_mean: 35.3
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.75
  episode_reward_min: -1.0
  episodes_this_iter: 24
  episodes_total: 1819
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.9536621077193155
          entropy_coeff: 0.009999999999999998
          kl: 0.016824275512770093
          policy_loss: 0.017854134655661054
          total_loss: 0.26898889595435727
          vf_explained_var: 0.8184055089950562
          vf_loss: 0.2574338257312775
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,450,24332.9,450000,9.75,10,-1,35.3




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-08_22-09-19
  done: false
  episode_len_mean: 36.22
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.74
  episode_reward_min: -1.0
  episodes_this_iter: 43
  episodes_total: 1862
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.5290845271613863
          entropy_coeff: 0.009999999999999998
          kl: 0.00878110389454596
          policy_loss: -0.028665608747137916
          total_loss: 0.21068333180414306
          vf_explained_var: 0.9763010740280151
          vf_loss: 0.24295000301467048
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,451,25153.3,451000,9.74,10,-1,36.22




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-08_22-14-23
  done: false
  episode_len_mean: 40.24
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.66
  episode_reward_min: -1.0
  episodes_this_iter: 16
  episodes_total: 1878
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.9384575873613358
          entropy_coeff: 0.009999999999999998
          kl: 0.01162034722699106
          policy_loss: -0.05990577878223525
          total_loss: 0.456799240079191
          vf_explained_var: 0.913115918636322
          vf_loss: 0.5238534470399221
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,452,25457.3,452000,9.66,10,-1,40.24




Result for PPO_my_env_c4eb6_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-08_22-19-03
  done: false
  episode_len_mean: 35.58
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.74
  episode_reward_min: 2.0
  episodes_this_iter: 15
  episodes_total: 1893
  experiment_id: 85625a43911a48d5a508c61fb7ca3640
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.8339942104286617
          entropy_coeff: 0.009999999999999998
          kl: 0.01307731742886163
          policy_loss: -0.1722606495850616
          total_loss: -0.003991273128324085
          vf_explained_var: 0.6649593710899353
          vf_loss: 0.17409280232257313
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c4eb6_00000,RUNNING,192.168.3.5:213,453,25737.5,453000,9.74,10,2,35.58


