In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C32 pretrained (AnnaCNN) (3 noops after placement)"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_21374_00000,PENDING,


2021-10-06 14:10:16,231	INFO wandb.py:170 -- Already logged into W&B.
2021-10-06 14:10:16,240	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=175)[0m 2021-10-06 14:10:19,757	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=175)[0m 2021-10-06 14:10:19,757	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=175)[0m 2021-10-06 14:10:28,630	INFO trainable.py:109 -- Trainable.setup took 11.389 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-06_14-11-27
  done: false
  episode_len_mean: 966.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 0.5071485141913096
          entropy_coeff: 0.009999999999999998
          kl: 0.02036470083990674
          policy_loss: 0.017980292439460754
          total_loss: 0.0417120463318295
          vf_explained_var: 0.6013163924217224
          vf_loss: 0.024730300173784296
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,1,59.1954,1000,0,0,0,966


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-06_14-11-39
  done: false
  episode_len_mean: 873.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 0.7572787642478943
          entropy_coeff: 0.009999999999999998
          kl: 0.019120520803810513
          policy_loss: -0.14099260816971462
          total_loss: -0.09946962280405892
          vf_explained_var: 0.4845787584781647
          vf_loss: 0.04335961723700166
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3.5
  num_healthy_work

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,2,70.9033,2000,0,0,0,873


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-06_14-11-52
  done: false
  episode_len_mean: 841.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 0.9105160368813409
          entropy_coeff: 0.009999999999999998
          kl: 0.014182283091645351
          policy_loss: 0.04532004056705369
          total_loss: 0.05650502944158183
          vf_explained_var: 0.7249862551689148
          vf_loss: 0.01603546527007388
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.3.5
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,3,83.5818,3000,0,0,0,841


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-06_14-12-05
  done: false
  episode_len_mean: 758.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.237285042471356
          entropy_coeff: 0.009999999999999998
          kl: 0.018882099107416374
          policy_loss: 0.030106200464069845
          total_loss: 0.03896068860259321
          vf_explained_var: 0.6835134029388428
          vf_loss: 0.0155627084016386
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.3.5
  num_healthy_workers

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,4,97.06,4000,0,0,0,758.2


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-06_14-12-19
  done: false
  episode_len_mean: 738.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.3801479564772712
          entropy_coeff: 0.009999999999999998
          kl: 0.006479052537750837
          policy_loss: -0.20445111791292828
          total_loss: -0.20725220864017804
          vf_explained_var: 0.5822589993476868
          vf_loss: 0.009056675348741312
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.3.5
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,5,110.406,5000,0,0,0,738.5


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-06_14-12-32
  done: false
  episode_len_mean: 721.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 8
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.2484274956915113
          entropy_coeff: 0.009999999999999998
          kl: 0.0348012464928598
          policy_loss: 0.15753751479917102
          total_loss: 0.15988288621107738
          vf_explained_var: 0.3884495198726654
          vf_loss: 0.004389277400655879
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.3.5
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,6,123.351,6000,0,0,0,721.75


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-06_14-12-46
  done: false
  episode_len_mean: 699.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 10
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3315726942486232
          entropy_coeff: 0.009999999999999998
          kl: 0.014072222721292436
          policy_loss: -0.24840349265270764
          total_loss: -0.2501309122476313
          vf_explained_var: 0.9145650267601013
          vf_loss: 0.005255805984294663
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,7,137.33,7000,0,0,0,699.9


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-06_14-12-59
  done: false
  episode_len_mean: 687.4545454545455
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5393257021903992
          entropy_coeff: 0.009999999999999998
          kl: 0.013605327564017718
          policy_loss: 0.04146591822306315
          total_loss: 0.03747817675272624
          vf_explained_var: 0.5638198852539062
          vf_loss: 0.005283113080076873
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,8,150.494,8000,0,0,0,687.455


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-06_14-13-13
  done: false
  episode_len_mean: 673.1538461538462
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 13
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6309255401293437
          entropy_coeff: 0.009999999999999998
          kl: 0.01687483100464541
          policy_loss: 0.047973526186413235
          total_loss: 0.043344876170158385
          vf_explained_var: 0.3448779284954071
          vf_loss: 0.004086927717111798
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,9,164.844,9000,0,0,0,673.154


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-06_14-13-29
  done: false
  episode_len_mean: 661.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 15
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6917937080065408
          entropy_coeff: 0.009999999999999998
          kl: 0.01106552777327337
          policy_loss: -0.06258109625842836
          total_loss: -0.07094924946626027
          vf_explained_var: 0.6892577409744263
          vf_loss: 0.0035702959259247616
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,10,180.183,10000,0,0,0,661.4


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-06_14-13-44
  done: false
  episode_len_mean: 646.9411764705883
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 17
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.573128412829505
          entropy_coeff: 0.009999999999999998
          kl: 0.006799255701171268
          policy_loss: -0.16901106304592556
          total_loss: -0.17974253661102718
          vf_explained_var: 0.8314281702041626
          vf_loss: 0.0019401410355284396
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,11,195.713,11000,0,0,0,646.941


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-06_14-13-57
  done: false
  episode_len_mean: 640.6111111111111
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7682346635394626
          entropy_coeff: 0.009999999999999998
          kl: 0.015775486535120897
          policy_loss: -0.029562983330753113
          total_loss: -0.03450597839223014
          vf_explained_var: 0.6856840252876282
          vf_loss: 0.005640376653496383
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,12,209.014,12000,0,0,0,640.611


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-06_14-14-11
  done: false
  episode_len_mean: 637.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.033910017543369
          entropy_coeff: 0.009999999999999998
          kl: 0.015038769193013174
          policy_loss: 0.04858702673680253
          total_loss: 0.038440572429034446
          vf_explained_var: 0.5137497186660767
          vf_loss: 0.0034252033504243527
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,13,222.313,13000,0,0,0,637.65


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-06_14-14-25
  done: false
  episode_len_mean: 632.0454545454545
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 22
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0082857065730626
          entropy_coeff: 0.009999999999999998
          kl: 0.010726978662933343
          policy_loss: 0.025617263134982853
          total_loss: 0.016292735520336363
          vf_explained_var: 0.5362178683280945
          vf_loss: 0.005931187763861898
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,14,236.239,14000,0,0,0,632.045


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-06_14-14-38
  done: false
  episode_len_mean: 629.9130434782609
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7029116961691115
          entropy_coeff: 0.009999999999999998
          kl: 0.007257166093498629
          policy_loss: 0.03668283820152283
          total_loss: 0.02722620674305492
          vf_explained_var: 0.2606094777584076
          vf_loss: 0.004306760321681698
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,15,249.44,15000,0,0,0,629.913


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-06_14-14-51
  done: false
  episode_len_mean: 627.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 25
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8636412964926825
          entropy_coeff: 0.009999999999999998
          kl: 0.008762467419924865
          policy_loss: 0.014001313596963882
          total_loss: 0.0022644942419396507
          vf_explained_var: -0.14676326513290405
          vf_loss: 0.0029564852852167357
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,16,262.849,16000,0,0,0,627.92


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-06_14-15-05
  done: false
  episode_len_mean: 625.2962962962963
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 27
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9211766110526192
          entropy_coeff: 0.009999999999999998
          kl: 0.01160630989442579
          policy_loss: -0.04057098492566082
          total_loss: -0.05114153027534485
          vf_explained_var: 0.33848413825035095
          vf_loss: 0.003418383117241319
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,17,276.679,17000,0,0,0,625.296


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-06_14-15-17
  done: false
  episode_len_mean: 622.8571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7786972721417744
          entropy_coeff: 0.009999999999999998
          kl: 0.01038574451884641
          policy_loss: -0.09402968933184942
          total_loss: -0.09938083932631546
          vf_explained_var: 0.6572713851928711
          vf_loss: 0.007762235470969851
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,18,288.793,18000,0,0,0,622.857




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-06_14-15-45
  done: false
  episode_len_mean: 628.5333333333333
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 30
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0220434374279446
          entropy_coeff: 0.009999999999999998
          kl: 0.002352285615261233
          policy_loss: 0.12548725762301022
          total_loss: 0.10655476500590642
          vf_explained_var: 0.005254317540675402
          vf_loss: 0.00022941265985233864
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,19,316.939,19000,0,0,0,628.533


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-06_14-16-04
  done: false
  episode_len_mean: 619.0625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.415374745262994
          entropy_coeff: 0.009999999999999998
          kl: 0.010805874109853262
          policy_loss: 0.07235568546586567
          total_loss: 0.05318358954456118
          vf_explained_var: -0.18650773167610168
          vf_loss: 0.0025503262166037327
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,20,335.784,20000,0,0,0,619.062


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-06_14-16-23
  done: false
  episode_len_mean: 608.6764705882352
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3879616234037613
          entropy_coeff: 0.009999999999999998
          kl: 0.018679893227057844
          policy_loss: -0.028022249870830112
          total_loss: -0.044779184460639956
          vf_explained_var: 0.22471220791339874
          vf_loss: 0.002919705363132784
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,21,354.313,21000,0,0,0,608.676


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-06_14-16-43
  done: false
  episode_len_mean: 599.0555555555555
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 36
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3926669862535266
          entropy_coeff: 0.009999999999999998
          kl: 0.01506703068843272
          policy_loss: -0.008186367319689856
          total_loss: -0.026808388779560724
          vf_explained_var: 0.312719464302063
          vf_loss: 0.0019145689102717572
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,22,374.156,22000,0,0,0,599.056


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-06_14-17-01
  done: false
  episode_len_mean: 588.8974358974359
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 39
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4023157119750977
          entropy_coeff: 0.009999999999999998
          kl: 0.01362160603421055
          policy_loss: 0.010513169649574492
          total_loss: -0.009322771181662878
          vf_explained_var: -0.27893152832984924
          vf_loss: 0.0011223516582200925
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,23,392.339,23000,0,0,0,588.897


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-06_14-17-19
  done: false
  episode_len_mean: 582.8048780487804
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 41
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4834727419747247
          entropy_coeff: 0.009999999999999998
          kl: 0.011574483860775824
          policy_loss: -0.14559343732479546
          total_loss: -0.16671252734959124
          vf_explained_var: -0.4205456078052521
          vf_loss: 0.001111378644903501
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,24,410.459,24000,0,0,0,582.805


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-06_14-17-37
  done: false
  episode_len_mean: 577.4883720930233
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 43
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4518007066514755
          entropy_coeff: 0.009999999999999998
          kl: 0.012524637337005571
          policy_loss: -0.11472606667213969
          total_loss: -0.1340815024657382
          vf_explained_var: -0.9340353012084961
          vf_loss: 0.0023445265367627146
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,25,428.207,25000,0,0,0,577.488


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-06_14-17-53
  done: false
  episode_len_mean: 575.8222222222222
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 45
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1440978712505765
          entropy_coeff: 0.009999999999999998
          kl: 0.018279019161902457
          policy_loss: 0.014767130878236558
          total_loss: 0.0004511662241485384
          vf_explained_var: -0.042492207139730453
          vf_loss: 0.0030122344201016756
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,26,444.211,26000,0,0,0,575.822


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-06_14-18-10
  done: false
  episode_len_mean: 573.7234042553191
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 47
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2811380439334448
          entropy_coeff: 0.009999999999999998
          kl: 0.010784412147657128
          policy_loss: -0.05398653737372822
          total_loss: -0.07198821165495449
          vf_explained_var: 0.019241265952587128
          vf_loss: 0.0023832121123430424
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,27,460.931,27000,0,0,0,573.723


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-06_14-18-26
  done: false
  episode_len_mean: 570.9591836734694
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 49
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.321840148501926
          entropy_coeff: 0.009999999999999998
          kl: 0.012037205824069997
          policy_loss: -0.09495010715391901
          total_loss: -0.11391078556577365
          vf_explained_var: -0.7240142226219177
          vf_loss: 0.0015493477526534762
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,28,476.844,28000,0,0,0,570.959


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-06_14-18-42
  done: false
  episode_len_mean: 570.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1659639199574787
          entropy_coeff: 0.009999999999999998
          kl: 0.008767390166521673
          policy_loss: -0.15171853767500984
          total_loss: -0.17039725076821116
          vf_explained_var: -0.5867584943771362
          vf_loss: 0.0010082620923640207
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,29,493.265,29000,0,0,0,570


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-06_14-18-59
  done: false
  episode_len_mean: 565.377358490566
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.030163052346971
          entropy_coeff: 0.009999999999999998
          kl: 0.013700035027653396
          policy_loss: 0.038079239428043365
          total_loss: 0.02203517576886548
          vf_explained_var: -0.5337970852851868
          vf_loss: 0.0011750592099916603
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,30,510.521,30000,0,0,0,565.377


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-06_14-19-16
  done: false
  episode_len_mean: 563.290909090909
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 55
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.306532796223958
          entropy_coeff: 0.009999999999999998
          kl: 0.012281638065878024
          policy_loss: -0.036201330295039545
          total_loss: -0.05241681196623378
          vf_explained_var: -0.5271282196044922
          vf_loss: 0.004086479420463244
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,31,527.555,31000,0,0,0,563.291


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-06_14-19-33
  done: false
  episode_len_mean: 562.5535714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.082824687163035
          entropy_coeff: 0.009999999999999998
          kl: 0.011409892622198055
          policy_loss: -0.0010770115587446425
          total_loss: -0.018179722792572444
          vf_explained_var: -0.048923835158348083
          vf_loss: 0.001158305869547702
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,32,544.554,32000,0,0,0,562.554


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-06_14-19-53
  done: false
  episode_len_mean: 558.4067796610169
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 59
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.276282024383545
          entropy_coeff: 0.009999999999999998
          kl: 0.010962610247752795
          policy_loss: -0.005573253871666061
          total_loss: -0.02327241376042366
          vf_explained_var: -0.10978514701128006
          vf_loss: 0.002597075110275505
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,33,563.765,33000,0,0,0,558.407




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-06_14-20-29
  done: false
  episode_len_mean: 554.0983606557377
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 61
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.364213373925951
          entropy_coeff: 0.009999999999999998
          kl: 0.014953473117437882
          policy_loss: -0.04507027682330873
          total_loss: -0.06397131317191654
          vf_explained_var: -0.9040290713310242
          vf_loss: 0.0013765647962120258
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,34,599.935,34000,0,0,0,554.098


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-06_14-20-48
  done: false
  episode_len_mean: 549.8571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 63
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.41789038711124
          entropy_coeff: 0.009999999999999998
          kl: 0.013813483555789654
          policy_loss: -0.016767474760611852
          total_loss: -0.03649651611016856
          vf_explained_var: -0.7549787163734436
          vf_loss: 0.0013418266666121782
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,35,619.214,35000,0,0,0,549.857


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-06_14-21-06
  done: false
  episode_len_mean: 545.030303030303
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 66
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2955459541744654
          entropy_coeff: 0.009999999999999998
          kl: 0.012959420608604036
          policy_loss: 0.002609030447072453
          total_loss: -0.015099832870894008
          vf_explained_var: -0.5157346725463867
          vf_loss: 0.00233072799940904
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,36,637.315,36000,0,0,0,545.03


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-06_14-21-25
  done: false
  episode_len_mean: 541.6323529411765
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 68
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3326867540677387
          entropy_coeff: 0.009999999999999998
          kl: 0.009617861664608679
          policy_loss: -0.004300108386410607
          total_loss: -0.02447380936808056
          vf_explained_var: -0.6420633792877197
          vf_loss: 0.0009891453102075806
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,37,656.234,37000,0,0,0,541.632


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-06_14-21-43
  done: false
  episode_len_mean: 538.6142857142858
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.357290572590298
          entropy_coeff: 0.009999999999999998
          kl: 0.014586195217466033
          policy_loss: -0.03261737231579092
          total_loss: -0.05126663521967
          vf_explained_var: -0.37463459372520447
          vf_loss: 0.0016417478961456152
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,38,674.179,38000,0,0,0,538.614


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-06_14-22-00
  done: false
  episode_len_mean: 536.8194444444445
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 72
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.302868376837836
          entropy_coeff: 0.009999999999999998
          kl: 0.016770324673748328
          policy_loss: -0.06713909726175997
          total_loss: -0.08446305936409368
          vf_explained_var: -0.4178003668785095
          vf_loss: 0.0019313968478753749
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,39,691.038,39000,0,0,0,536.819


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-06_14-22-18
  done: false
  episode_len_mean: 532.6533333333333
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 75
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2388808621300593
          entropy_coeff: 0.009999999999999998
          kl: 0.012639134931509312
          policy_loss: -0.04549748144216008
          total_loss: -0.06336293009420237
          vf_explained_var: 0.20213334262371063
          vf_loss: 0.0016795565031417128
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,40,709.034,40000,0,0,0,532.653


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-06_14-22-36
  done: false
  episode_len_mean: 530.8571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 77
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2847841554217867
          entropy_coeff: 0.009999999999999998
          kl: 0.013260609883498375
          policy_loss: 0.01716281846165657
          total_loss: -0.0015166004498799641
          vf_explained_var: 0.017092270776629448
          vf_loss: 0.0011847848793776292
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,41,727.098,41000,0,0,0,530.857


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-06_14-22-56
  done: false
  episode_len_mean: 527.746835443038
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 79
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3261839866638185
          entropy_coeff: 0.009999999999999998
          kl: 0.01624224440879733
          policy_loss: -0.08537489490376579
          total_loss: -0.10366679843929079
          vf_explained_var: -0.18625831604003906
          vf_loss: 0.0013154288113582878
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,42,746.825,42000,0,0,0,527.747


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-06_14-23-14
  done: false
  episode_len_mean: 525.5432098765432
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 81
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.364904114935133
          entropy_coeff: 0.009999999999999998
          kl: 0.014838121770841036
          policy_loss: -0.045731910939017933
          total_loss: -0.06443202466600471
          vf_explained_var: -0.5770139694213867
          vf_loss: 0.0016103483833527813
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,43,765.3,43000,0,0,0,525.543


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-06_14-23-32
  done: false
  episode_len_mean: 522.4404761904761
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 84
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1176261782646177
          entropy_coeff: 0.009999999999999998
          kl: 0.012655571290237136
          policy_loss: -0.07568255298667484
          total_loss: -0.09316671085026529
          vf_explained_var: -0.15387170016765594
          vf_loss: 0.000844599854058793
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,44,782.832,44000,0,0,0,522.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-06_14-23-51
  done: false
  episode_len_mean: 520.7325581395348
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 86
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2112664116753473
          entropy_coeff: 0.009999999999999998
          kl: 0.012004902103658467
          policy_loss: -0.07485327091481951
          total_loss: -0.09126981182230844
          vf_explained_var: -0.8568789958953857
          vf_loss: 0.0029950198772389237
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,45,801.75,45000,0,0,0,520.733


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-06_14-24-10
  done: false
  episode_len_mean: 518.5568181818181
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 88
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.173849850230747
          entropy_coeff: 0.009999999999999998
          kl: 0.02085878316454885
          policy_loss: -0.04586854964080784
          total_loss: -0.06180076205896007
          vf_explained_var: 0.03841510787606239
          vf_loss: 0.0011130587657033984
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,46,820.86,46000,0,0,0,518.557




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-06_14-24-47
  done: false
  episode_len_mean: 514.5164835164835
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 91
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2923230780495536
          entropy_coeff: 0.009999999999999998
          kl: 0.013768339420960944
          policy_loss: -0.08760065183871323
          total_loss: -0.10472276426023908
          vf_explained_var: -0.8233001828193665
          vf_loss: 0.0011543036490264866
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,47,858.022,47000,0,0,0,514.516


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-06_14-25-05
  done: false
  episode_len_mean: 512.989247311828
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 93
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1808781147003176
          entropy_coeff: 0.009999999999999998
          kl: 0.011784553128113107
          policy_loss: 0.027052655402157042
          total_loss: 0.011412174627184867
          vf_explained_var: -0.8177082538604736
          vf_loss: 0.0021910164518178336
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,48,875.796,48000,0,0,0,512.989


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-06_14-25-23
  done: false
  episode_len_mean: 511.4947368421053
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 95
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.204087773958842
          entropy_coeff: 0.009999999999999998
          kl: 0.011901261408288056
          policy_loss: -0.060460909828543664
          total_loss: -0.07451910629040664
          vf_explained_var: -0.7502480149269104
          vf_loss: 0.0039660032618687386
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,49,893.962,49000,0,0,0,511.495


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-06_14-25-40
  done: false
  episode_len_mean: 509.7244897959184
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1857634411917792
          entropy_coeff: 0.009999999999999998
          kl: 0.013878724560874092
          policy_loss: -0.03707688897848129
          total_loss: -0.05208363106681241
          vf_explained_var: -0.60608971118927
          vf_loss: 0.002166821953142062
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,50,911.088,50000,0,0,0,509.724


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-06_14-25-58
  done: false
  episode_len_mean: 508.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.164301233821445
          entropy_coeff: 0.009999999999999998
          kl: 0.00854582910428873
          policy_loss: 0.029392323518792788
          total_loss: 0.012245290923035806
          vf_explained_var: -0.5702173709869385
          vf_loss: 0.0016117609668678293
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,51,928.452,51000,0,0,0,508.16


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-06_14-26-14
  done: false
  episode_len_mean: 500.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 102
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2526859203974405
          entropy_coeff: 0.009999999999999998
          kl: 0.012062967186520761
          policy_loss: 0.00011251237657335069
          total_loss: -0.01698901831275887
          vf_explained_var: -1.0
          vf_loss: 0.0013540760200056764
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,52,944.808,52000,0,0,0,500.67


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-06_14-26-30
  done: false
  episode_len_mean: 495.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 104
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.298818850517273
          entropy_coeff: 0.009999999999999998
          kl: 0.012481657720367719
          policy_loss: -0.06959351143903203
          total_loss: -0.08752974429064327
          vf_explained_var: -0.5838901400566101
          vf_loss: 0.0008393950959240707
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,53,961.099,53000,0,0,0,495.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-06_14-26-47
  done: false
  episode_len_mean: 493.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 106
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1999814907709756
          entropy_coeff: 0.009999999999999998
          kl: 0.013399046313698312
          policy_loss: -0.029844710230827333
          total_loss: -0.045759806202517614
          vf_explained_var: -0.560759425163269
          vf_loss: 0.0015625397926972559
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,54,977.736,54000,0,0,0,493.5


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-06_14-27-03
  done: false
  episode_len_mean: 489.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 108
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.179515798886617
          entropy_coeff: 0.009999999999999998
          kl: 0.010415570427012449
          policy_loss: 0.02132337594197856
          total_loss: 0.007981575632260905
          vf_explained_var: -0.80272376537323
          vf_loss: 0.004938100235611071
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,55,993.707,55000,0,0,0,489.94


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-06_14-27-20
  done: false
  episode_len_mean: 486.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2539958053165012
          entropy_coeff: 0.009999999999999998
          kl: 0.012417594525764346
          policy_loss: -0.0014058492249912685
          total_loss: -0.01820049877795908
          vf_explained_var: -0.6187347769737244
          vf_loss: 0.0015543701683378053
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,56,1011.12,56000,0,0,0,486.73


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-06_14-27-37
  done: false
  episode_len_mean: 484.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 112
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2351653046078153
          entropy_coeff: 0.009999999999999998
          kl: 0.010411954960599651
          policy_loss: -0.0555434246857961
          total_loss: -0.07273421006070244
          vf_explained_var: -0.41190120577812195
          vf_loss: 0.001646830111147008
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,57,1027.82,57000,0,0,0,484.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-06_14-27-55
  done: false
  episode_len_mean: 482.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 114
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.245934674474928
          entropy_coeff: 0.009999999999999998
          kl: 0.012523311945593734
          policy_loss: -0.0708002725823058
          total_loss: -0.0867017411109474
          vf_explained_var: -0.4623943269252777
          vf_loss: 0.0023312577039986434
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,58,1045.52,58000,0,0,0,482.66


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-06_14-28-12
  done: false
  episode_len_mean: 480.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 116
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.297883150312636
          entropy_coeff: 0.009999999999999998
          kl: 0.010132855197880571
          policy_loss: -0.05304202079359028
          total_loss: -0.07136777024716139
          vf_explained_var: -0.21610872447490692
          vf_loss: 0.0012332441886731733
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,59,1062.65,59000,0,0,0,480.71


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-06_14-28-29
  done: false
  episode_len_mean: 478.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 119
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2065973732206556
          entropy_coeff: 0.009999999999999998
          kl: 0.009205779578744928
          policy_loss: 0.040876177160276306
          total_loss: 0.025552483234140607
          vf_explained_var: -0.8208518028259277
          vf_loss: 0.003635329944599006
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,60,1080.06,60000,0,0,0,478.11




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-06_14-29-05
  done: false
  episode_len_mean: 474.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 121
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.210782119962904
          entropy_coeff: 0.009999999999999998
          kl: 0.013914061018620909
          policy_loss: -0.0525794956419203
          total_loss: -0.0675370781785912
          vf_explained_var: -0.4350879192352295
          vf_loss: 0.0024542418431438917
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,61,1115.39,61000,0,0,0,474.81


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-06_14-29-23
  done: false
  episode_len_mean: 472.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 123
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.260226527849833
          entropy_coeff: 0.009999999999999998
          kl: 0.012784510392571056
          policy_loss: -0.08002541483276421
          total_loss: -0.09591560941189528
          vf_explained_var: 0.04735171049833298
          vf_loss: 0.0023972962433213576
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,62,1133.62,62000,0,0,0,472.19


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-06_14-29-42
  done: false
  episode_len_mean: 468.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 125
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3036494493484496
          entropy_coeff: 0.009999999999999998
          kl: 0.009072005559401535
          policy_loss: 0.0010133017475406328
          total_loss: -0.01779507233036889
          vf_explained_var: -0.9826317429542542
          vf_loss: 0.0011663195599491397
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,63,1152.51,63000,0,0,0,468.71


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-06_14-30-00
  done: false
  episode_len_mean: 464.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 128
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2633219639460247
          entropy_coeff: 0.009999999999999998
          kl: 0.010870558291378203
          policy_loss: -0.025533304487665495
          total_loss: -0.04283280972805288
          vf_explained_var: -0.9067294597625732
          vf_loss: 0.0016649022465571762
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,64,1170.94,64000,0,0,0,464.7


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-06_14-30-17
  done: false
  episode_len_mean: 460.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 130
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.262018500434028
          entropy_coeff: 0.009999999999999998
          kl: 0.01256987745735024
          policy_loss: -0.017163740636573898
          total_loss: -0.033854976751738125
          vf_explained_var: 0.00913392473012209
          vf_loss: 0.001686617081415736
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,65,1187.79,65000,0,0,0,460.02


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-06_14-30-37
  done: false
  episode_len_mean: 458.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 132
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2744289610120987
          entropy_coeff: 0.009999999999999998
          kl: 0.014970757955841347
          policy_loss: -0.02121506511337227
          total_loss: -0.0377461899485853
          vf_explained_var: -0.42829030752182007
          vf_loss: 0.0011605346594781925
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,66,1207.75,66000,0,0,0,458.9


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-06_14-30-56
  done: false
  episode_len_mean: 458.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 134
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3086379554536607
          entropy_coeff: 0.009999999999999998
          kl: 0.013461410523445691
          policy_loss: -0.04299076713828577
          total_loss: -0.05870185926970509
          vf_explained_var: -0.36026525497436523
          vf_loss: 0.0028320579065216913
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,67,1226.07,67000,0,0,0,458.93


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-06_14-31-15
  done: false
  episode_len_mean: 458.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 137
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.309504606988695
          entropy_coeff: 0.009999999999999998
          kl: 0.013302460613304213
          policy_loss: -0.04964480337997278
          total_loss: -0.06717836211125056
          vf_explained_var: -0.6082040071487427
          vf_loss: 0.001071908403860612
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,68,1245.31,68000,0,0,0,458.23


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-06_14-31-34
  done: false
  episode_len_mean: 457.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 139
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.174455165863037
          entropy_coeff: 0.009999999999999998
          kl: 0.014611806583692497
          policy_loss: -0.09045104434092839
          total_loss: -0.10503003522753715
          vf_explained_var: -0.005307760555297136
          vf_loss: 0.002234076606368439
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,69,1264.49,69000,0,0,0,457.41


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-06_14-31-51
  done: false
  episode_len_mean: 456.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 141
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3049838410483465
          entropy_coeff: 0.009999999999999998
          kl: 0.011999864805120926
          policy_loss: 0.015151694065166844
          total_loss: -0.002031138249569469
          vf_explained_var: -0.5345748662948608
          vf_loss: 0.0018170525022368465
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,70,1281.65,70000,0,0,0,456.8


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-06_14-32-09
  done: false
  episode_len_mean: 457.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 143
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1558111270268756
          entropy_coeff: 0.009999999999999998
          kl: 0.01535775453611878
          policy_loss: -0.02641830180461208
          total_loss: -0.040415118200083575
          vf_explained_var: -0.18624000251293182
          vf_loss: 0.002378051785571087
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,71,1299.01,71000,0,0,0,457.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-06_14-32-27
  done: false
  episode_len_mean: 454.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 146
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3218122826682195
          entropy_coeff: 0.009999999999999998
          kl: 0.01256766959601272
          policy_loss: 0.048260841394464175
          total_loss: 0.03064615548484855
          vf_explained_var: -0.7175397872924805
          vf_loss: 0.0013618484993154804
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,72,1317.72,72000,0,0,0,454.99


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-06_14-32-46
  done: false
  episode_len_mean: 453.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 148
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.279278302192688
          entropy_coeff: 0.009999999999999998
          kl: 0.012370526666550526
          policy_loss: -0.10574529940883319
          total_loss: -0.12265685742927922
          vf_explained_var: -0.5146021246910095
          vf_loss: 0.0017061691582461612
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,73,1336.23,73000,0,0,0,453.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-06_14-33-22
  done: false
  episode_len_mean: 450.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 150
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2125047657224868
          entropy_coeff: 0.009999999999999998
          kl: 0.010364889193530314
          policy_loss: -0.03274722312473589
          total_loss: -0.050445713868571655
          vf_explained_var: -0.5462191104888916
          vf_loss: 0.0009284086719465752
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,74,1372.84,74000,0,0,0,450.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-06_14-33-41
  done: false
  episode_len_mean: 449.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3414266029993693
          entropy_coeff: 0.009999999999999998
          kl: 0.011211919712488763
          policy_loss: -0.025538456646932495
          total_loss: -0.043779262113902304
          vf_explained_var: -0.2473910003900528
          vf_loss: 0.001389438354332621
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,75,1391.78,75000,0,0,0,449.12


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-06_14-33-58
  done: false
  episode_len_mean: 448.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 155
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1882214360766943
          entropy_coeff: 0.009999999999999998
          kl: 0.01217207338010697
          policy_loss: 0.0009710455934206645
          total_loss: -0.015173502018054326
          vf_explained_var: -0.9320875406265259
          vf_loss: 0.0016295925300154421
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,76,1408.79,76000,0,0,0,448.73


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-06_14-34-15
  done: false
  episode_len_mean: 448.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 157
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1291398829884
          entropy_coeff: 0.009999999999999998
          kl: 0.011636820810970998
          policy_loss: -0.012898222470862999
          total_loss: -0.02858266444462869
          vf_explained_var: -0.5012930631637573
          vf_loss: 0.0016795295743375189
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,77,1425.27,77000,0,0,0,448.24


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-06_14-34-34
  done: false
  episode_len_mean: 448.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 159
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.218181037902832
          entropy_coeff: 0.009999999999999998
          kl: 0.01341784562254548
          policy_loss: -0.08980967932277256
          total_loss: -0.10558321424242523
          vf_explained_var: -0.850307047367096
          vf_loss: 0.0018797515775077045
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,78,1443.99,78000,0,0,0,448.17


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-06_14-34-52
  done: false
  episode_len_mean: 448.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 161
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.391434375445048
          entropy_coeff: 0.009999999999999998
          kl: 0.013128162112786316
          policy_loss: -0.022643980818490186
          total_loss: -0.04095953293144703
          vf_explained_var: -0.5883063673973083
          vf_loss: 0.0011680376243829312
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,79,1462.38,79000,0,0,0,448.12


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-06_14-35-10
  done: false
  episode_len_mean: 449.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 163
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3750210205713906
          entropy_coeff: 0.009999999999999998
          kl: 0.010508286609777226
          policy_loss: -0.003525549504492018
          total_loss: -0.02274986079169644
          vf_explained_var: -0.9996708631515503
          vf_loss: 0.000979351446342965
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,80,1480.45,80000,0,0,0,449.06


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-06_14-35-27
  done: false
  episode_len_mean: 450.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 165
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1773183080885143
          entropy_coeff: 0.009999999999999998
          kl: 0.01501757690437964
          policy_loss: -0.1617034246524175
          total_loss: -0.1765613739689191
          vf_explained_var: 0.13014277815818787
          vf_loss: 0.001846800649461026
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,81,1496.94,81000,0,0,0,450.81


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-06_14-35-45
  done: false
  episode_len_mean: 451.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 168
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2433457295099895
          entropy_coeff: 0.009999999999999998
          kl: 0.010948023276296166
          policy_loss: -0.09057746984892422
          total_loss: -0.10598767151435216
          vf_explained_var: -0.5489946603775024
          vf_loss: 0.0033282958921821166
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,82,1514.94,82000,0,0,0,451.25


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-06_14-36-01
  done: false
  episode_len_mean: 452.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1493202884991964
          entropy_coeff: 0.009999999999999998
          kl: 0.015346623217862346
          policy_loss: -0.005156778295834859
          total_loss: -0.01972512797349029
          vf_explained_var: -0.4760540723800659
          vf_loss: 0.0017453676193124718
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,83,1530.75,83000,0,0,0,452.32


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-06_14-36-18
  done: false
  episode_len_mean: 453.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 172
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0935402592023213
          entropy_coeff: 0.009999999999999998
          kl: 0.012793774082070384
          policy_loss: 0.03730097450315952
          total_loss: 0.022322470405035547
          vf_explained_var: -0.2819552421569824
          vf_loss: 0.0016389997941183133
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,84,1547.97,84000,0,0,0,453.45


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-06_14-36-34
  done: false
  episode_len_mean: 455.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.220693924691942
          entropy_coeff: 0.009999999999999998
          kl: 0.012937244788909898
          policy_loss: -0.11842137525478999
          total_loss: -0.1337782039410538
          vf_explained_var: -0.32547527551651
          vf_loss: 0.0024837897003938754
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,85,1563.74,85000,0,0,0,455.09


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-06_14-36-51
  done: false
  episode_len_mean: 455.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 176
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2617243236965603
          entropy_coeff: 0.009999999999999998
          kl: 0.013986885505918966
          policy_loss: -0.11271899744040437
          total_loss: -0.12897504303190443
          vf_explained_var: -0.4634079337120056
          vf_loss: 0.001640620860012455
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,86,1581.25,86000,0,0,0,455.87


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-06_14-37-09
  done: false
  episode_len_mean: 456.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 178
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.138553094863892
          entropy_coeff: 0.009999999999999998
          kl: 0.013886788812780597
          policy_loss: -0.0345614335189263
          total_loss: -0.04983204570081499
          vf_explained_var: -1.0
          vf_loss: 0.0014281248589718921
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,87,1599.53,87000,0,0,0,456.43




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-06_14-37-45
  done: false
  episode_len_mean: 455.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 180
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2965694506963095
          entropy_coeff: 0.009999999999999998
          kl: 0.012813300758249666
          policy_loss: -0.06463656131592062
          total_loss: -0.08176885823615723
          vf_explained_var: -0.6290180087089539
          vf_loss: 0.0015089103473453886
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,88,1634.76,88000,0,0,0,455.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-06_14-38-03
  done: false
  episode_len_mean: 455.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 182
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.23397888607449
          entropy_coeff: 0.009999999999999998
          kl: 0.015943360009293566
          policy_loss: -0.051412911257810064
          total_loss: -0.06646213755011558
          vf_explained_var: -0.8304919600486755
          vf_loss: 0.0019096770453163319
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,89,1652.78,89000,0,0,0,455.83


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-06_14-38-20
  done: false
  episode_len_mean: 456.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 185
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2407259040408665
          entropy_coeff: 0.009999999999999998
          kl: 0.011932234647159411
          policy_loss: -0.05839931014925241
          total_loss: -0.07543425396498707
          vf_explained_var: -0.9110748171806335
          vf_loss: 0.0013451884112631281
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,90,1670.41,90000,0,0,0,456.25


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-06_14-38-39
  done: false
  episode_len_mean: 455.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 187
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2763402541478475
          entropy_coeff: 0.009999999999999998
          kl: 0.011605149491409625
          policy_loss: -0.059434059510628384
          total_loss: -0.0770717040532165
          vf_explained_var: -0.7551144957542419
          vf_loss: 0.0012090178374718462
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,91,1688.84,91000,0,0,0,455.93


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-06_14-38-56
  done: false
  episode_len_mean: 455.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 189
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.248442437913683
          entropy_coeff: 0.009999999999999998
          kl: 0.011735029692419217
          policy_loss: 0.022656976824833287
          total_loss: 0.0059732073297103245
          vf_explained_var: -0.9996225833892822
          vf_loss: 0.001840081545459624
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,92,1706.1,92000,0,0,0,455.91


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-06_14-39-12
  done: false
  episode_len_mean: 457.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 191
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.177664271990458
          entropy_coeff: 0.009999999999999998
          kl: 0.010118302807141117
          policy_loss: -0.09747256934642792
          total_loss: -0.11452404004004267
          vf_explained_var: -0.3371618986129761
          vf_loss: 0.0013102450834897657
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,93,1721.9,93000,0,0,0,457.67


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-06_14-39-28
  done: false
  episode_len_mean: 459.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 193
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2130037705103556
          entropy_coeff: 0.009999999999999998
          kl: 0.015577218766152433
          policy_loss: -0.023505234842499098
          total_loss: -0.03848656684988075
          vf_explained_var: 0.1344163715839386
          vf_loss: 0.0018913917243480682
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,94,1738.36,94000,0,0,0,459.8


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-06_14-39-44
  done: false
  episode_len_mean: 460.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 195
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.172675559255812
          entropy_coeff: 0.009999999999999998
          kl: 0.012660730609803843
          policy_loss: -0.029406818913088904
          total_loss: -0.04521373990509245
          vf_explained_var: -0.9895788431167603
          vf_loss: 0.0016468360127570728
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,95,1754.47,95000,0,0,0,460.8


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-06_14-40-00
  done: false
  episode_len_mean: 462.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 197
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.168949267599318
          entropy_coeff: 0.009999999999999998
          kl: 0.009105063643593227
          policy_loss: 0.0008460729072491328
          total_loss: -0.016488761641085148
          vf_explained_var: -0.8362541198730469
          vf_loss: 0.001281697592154766
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,96,1769.97,96000,0,0,0,462.68


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-06_14-40-16
  done: false
  episode_len_mean: 463.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 199
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2301124572753905
          entropy_coeff: 0.009999999999999998
          kl: 0.013519666971414778
          policy_loss: -0.03482242917848958
          total_loss: -0.05076894907073842
          vf_explained_var: -0.9688199162483215
          vf_loss: 0.0017917163592452805
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,97,1785.94,97000,0,0,0,463.92


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-06_14-40-32
  done: false
  episode_len_mean: 463.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 201
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.214715846379598
          entropy_coeff: 0.009999999999999998
          kl: 0.014436478659876626
          policy_loss: -0.032895842318733534
          total_loss: -0.048679857949415845
          vf_explained_var: -0.8472967147827148
          vf_loss: 0.001490830888117974
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,98,1802.42,98000,0,0,0,463.75


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-06_14-40-50
  done: false
  episode_len_mean: 464.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 203
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2004836055967543
          entropy_coeff: 0.009999999999999998
          kl: 0.011110009985713414
          policy_loss: -0.04417083896696568
          total_loss: -0.06119958547254403
          vf_explained_var: -0.619084358215332
          vf_loss: 0.0012264606135431676
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,99,1819.61,99000,0,0,0,464.28


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-06_14-41-07
  done: false
  episode_len_mean: 463.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 205
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1462151765823365
          entropy_coeff: 0.009999999999999998
          kl: 0.013774605113417987
          policy_loss: 0.028213853078583876
          total_loss: 0.012718661046690411
          vf_explained_var: -0.7859753370285034
          vf_loss: 0.0013180306006688625
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,100,1837.39,100000,0,0,0,463.58


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-06_14-41-27
  done: false
  episode_len_mean: 461.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 208
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1164684189690486
          entropy_coeff: 0.009999999999999998
          kl: 0.015144936443746322
          policy_loss: -0.010661742008394665
          total_loss: -0.025531425658199523
          vf_explained_var: -0.9418252110481262
          vf_loss: 0.0011835848899661666
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,101,1856.83,101000,0,0,0,461.41




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-06_14-42-00
  done: false
  episode_len_mean: 461.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 210
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.023369548055861
          entropy_coeff: 0.009999999999999998
          kl: 0.012599653125545146
          policy_loss: -0.04191623373577992
          total_loss: -0.05676527033663458
          vf_explained_var: -1.0
          vf_loss: 0.0011322746764765017
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,102,1890.33,102000,0,0,0,461.38


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-06_14-42-17
  done: false
  episode_len_mean: 463.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 212
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1037011437945896
          entropy_coeff: 0.009999999999999998
          kl: 0.014356402763997798
          policy_loss: -0.07432575180298752
          total_loss: -0.08941909625298447
          vf_explained_var: -0.4512487053871155
          vf_loss: 0.0010983810727743224
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,103,1906.64,103000,0,0,0,463.12


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-06_14-42-33
  done: false
  episode_len_mean: 463.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 214
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1146015326182046
          entropy_coeff: 0.009999999999999998
          kl: 0.015018884662280721
          policy_loss: -0.03651810301881697
          total_loss: -0.050631789780325356
          vf_explained_var: -0.8525304794311523
          vf_loss: 0.0019634534617782467
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,104,1923.29,104000,0,0,0,463.33


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-06_14-42-54
  done: false
  episode_len_mean: 461.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 216
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.030696678161621
          entropy_coeff: 0.009999999999999998
          kl: 0.016428791813042923
          policy_loss: -0.008682504461871254
          total_loss: -0.020621332050197654
          vf_explained_var: -1.0
          vf_loss: 0.002823420918624227
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,105,1943.79,105000,0,0,0,461.52


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-06_14-43-09
  done: false
  episode_len_mean: 461.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 218
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1084663775232104
          entropy_coeff: 0.009999999999999998
          kl: 0.012353388392143156
          policy_loss: -0.034215447430809336
          total_loss: -0.049308751482102606
          vf_explained_var: -0.647167980670929
          vf_loss: 0.0018220888922870573
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,106,1958.94,106000,0,0,0,461.16


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-06_14-43-26
  done: false
  episode_len_mean: 462.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 220
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2147815386454264
          entropy_coeff: 0.009999999999999998
          kl: 0.01159210754501721
          policy_loss: -0.06568854157295492
          total_loss: -0.08142132156838973
          vf_explained_var: -0.91539067029953
          vf_loss: 0.0025026990618142817
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,107,1975.3,107000,0,0,0,462.41


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-06_14-43-41
  done: false
  episode_len_mean: 463.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 222
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.230794514550103
          entropy_coeff: 0.009999999999999998
          kl: 0.010767944803126555
          policy_loss: -0.02272097049281001
          total_loss: -0.03997972996181084
          vf_explained_var: -0.8024856448173523
          vf_loss: 0.0014150042752994019
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,108,1990.42,108000,0,0,0,463.96


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-06_14-43-56
  done: false
  episode_len_mean: 465.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 224
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.045667682753669
          entropy_coeff: 0.009999999999999998
          kl: 0.010294158130852556
          policy_loss: -0.08829964784284433
          total_loss: -0.10415485238449441
          vf_explained_var: -0.9894804954528809
          vf_loss: 0.0011271928922117998
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,109,2005.77,109000,0,0,0,465.75


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-06_14-44-13
  done: false
  episode_len_mean: 466.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 226
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1493347353405423
          entropy_coeff: 0.009999999999999998
          kl: 0.012050547453964612
          policy_loss: -0.09249902203058204
          total_loss: -0.10772469863295556
          vf_explained_var: -1.0
          vf_loss: 0.0022006089398120014
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,110,2022.99,110000,0,0,0,466.65


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-06_14-44-29
  done: false
  episode_len_mean: 467.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 228
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1765519645478992
          entropy_coeff: 0.009999999999999998
          kl: 0.01127484576661397
          policy_loss: -0.06601722261144055
          total_loss: -0.08213262909816371
          vf_explained_var: -1.0
          vf_loss: 0.0018448510517676672
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,111,2038.95,111000,0,0,0,467.64


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-06_14-44-45
  done: false
  episode_len_mean: 468.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 230
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1137907160653007
          entropy_coeff: 0.009999999999999998
          kl: 0.012085161146310636
          policy_loss: -0.05797718343221479
          total_loss: -0.07338635846972466
          vf_explained_var: -0.8581089377403259
          vf_loss: 0.0016499908472825257
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,112,2055.1,112000,0,0,0,468.02


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-06_14-45-01
  done: false
  episode_len_mean: 470.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 232
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0785913308461508
          entropy_coeff: 0.009999999999999998
          kl: 0.012038686228544983
          policy_loss: -0.07258203716741668
          total_loss: -0.0876965146097872
          vf_explained_var: -1.0
          vf_loss: 0.0016083799483668474
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,113,2070.31,113000,0,0,0,470.39


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-06_14-45-17
  done: false
  episode_len_mean: 471.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 234
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.072985009352366
          entropy_coeff: 0.009999999999999998
          kl: 0.011381302727191825
          policy_loss: -0.0638427403031124
          total_loss: -0.0789082329099377
          vf_explained_var: -0.7495878338813782
          vf_loss: 0.001823168739469515
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,114,2086.67,114000,0,0,0,471.39


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-06_14-45-33
  done: false
  episode_len_mean: 472.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 236
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1714448478486803
          entropy_coeff: 0.009999999999999998
          kl: 0.011807922156435606
          policy_loss: -0.06624793853196832
          total_loss: -0.0823870016882817
          vf_explained_var: -1.0
          vf_loss: 0.0015902118084745276
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,115,2102.54,115000,0,0,0,472.64


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-06_14-45-50
  done: false
  episode_len_mean: 473.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 238
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0205370201004875
          entropy_coeff: 0.009999999999999998
          kl: 0.013717436601165861
          policy_loss: -0.05815521101984713
          total_loss: -0.07205952944027053
          vf_explained_var: -0.7761900424957275
          vf_loss: 0.0016714170497531692
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,116,2119.9,116000,0,0,0,473.97




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-06_14-46-24
  done: false
  episode_len_mean: 474.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 240
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0046339604589676
          entropy_coeff: 0.009999999999999998
          kl: 0.016265575673774145
          policy_loss: -0.058983074418372576
          total_loss: -0.07154465586774879
          vf_explained_var: -1.0
          vf_loss: 0.0019951257324363624
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,117,2153.34,117000,0,0,0,474.41


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-06_14-46-41
  done: false
  episode_len_mean: 474.14
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 242
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1038234260347153
          entropy_coeff: 0.009999999999999998
          kl: 0.013872002866591534
          policy_loss: -0.08110035091845526
          total_loss: -0.09608647383542525
          vf_explained_var: -0.9992802739143372
          vf_loss: 0.0013703092583455146
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,118,2170.8,118000,0,0,0,474.14


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-06_14-46-59
  done: false
  episode_len_mean: 475.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 245
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0383077250586616
          entropy_coeff: 0.009999999999999998
          kl: 0.013826126016902633
          policy_loss: -0.025374649630652533
          total_loss: -0.03976141061219904
          vf_explained_var: -0.9480652809143066
          vf_loss: 0.001329998072469607
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,119,2188.3,119000,0,0,0,475.2


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-06_14-47-14
  done: false
  episode_len_mean: 476.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 247
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0574903938505384
          entropy_coeff: 0.009999999999999998
          kl: 0.011986341859278148
          policy_loss: -0.06517844498157502
          total_loss: -0.07984317830867238
          vf_explained_var: -0.705316424369812
          vf_loss: 0.0018647794003805352
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,120,2203.83,120000,0,0,0,476.04


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-06_14-47-34
  done: false
  episode_len_mean: 476.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 249
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0215391569667394
          entropy_coeff: 0.009999999999999998
          kl: 0.013571061993774603
          policy_loss: -0.06309663511605726
          total_loss: -0.07741633014132579
          vf_explained_var: -0.9968013167381287
          vf_loss: 0.0013154641304734267
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,121,2223.31,121000,0,0,0,476.3


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-06_14-47-54
  done: false
  episode_len_mean: 475.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 252
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9869281159506904
          entropy_coeff: 0.009999999999999998
          kl: 0.007596515365932848
          policy_loss: -0.12651936933398247
          total_loss: -0.1423713580601745
          vf_explained_var: -0.5487685203552246
          vf_loss: 0.0014534680036983143
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,122,2243.95,122000,0,0,0,475.74


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-06_14-48-14
  done: false
  episode_len_mean: 474.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 254
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.866599726676941
          entropy_coeff: 0.009999999999999998
          kl: 0.01146309807988241
          policy_loss: -0.03532946478161547
          total_loss: -0.04844040796160698
          vf_explained_var: -0.9422371983528137
          vf_loss: 0.001686254082273485
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,123,2263.94,123000,0,0,0,474.76


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-06_14-48-36
  done: false
  episode_len_mean: 470.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 257
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6172933922873602
          entropy_coeff: 0.009999999999999998
          kl: 0.016022174401140375
          policy_loss: -0.10977666088276439
          total_loss: -0.11598870919810401
          vf_explained_var: -0.3366422653198242
          vf_loss: 0.004553403966646228
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,124,2285.64,124000,0,0,0,470.49


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-06_14-48-58
  done: false
  episode_len_mean: 468.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 260
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6332072218259175
          entropy_coeff: 0.009999999999999998
          kl: 0.014375702326088004
          policy_loss: -0.005763823456234402
          total_loss: -0.01582788568403986
          vf_explained_var: -0.10792546719312668
          vf_loss: 0.0014162184670567513
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,125,2307.75,125000,0,0,0,468.05


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-06_14-49-19
  done: false
  episode_len_mean: 466.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 262
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.911447775363922
          entropy_coeff: 0.009999999999999998
          kl: 0.011452408421139202
          policy_loss: -0.039003903542955715
          total_loss: -0.05295069159732924
          vf_explained_var: -0.9986703395843506
          vf_loss: 0.001302498187093685
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,126,2328.31,126000,0,0,0,466.3


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-06_14-49-39
  done: false
  episode_len_mean: 463.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 265
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3782996204164293
          entropy_coeff: 0.009999999999999998
          kl: 0.013008882018731575
          policy_loss: 0.07596377581357956
          total_loss: 0.06804253955682119
          vf_explained_var: -0.23255503177642822
          vf_loss: 0.0014712595503725526
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,127,2348.23,127000,0,0,0,463.85


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-06_14-49-55
  done: false
  episode_len_mean: 464.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.837842082977295
          entropy_coeff: 0.009999999999999998
          kl: 0.012461721754999171
          policy_loss: -0.06399207313855489
          total_loss: -0.07614485323429107
          vf_explained_var: -0.4911079406738281
          vf_loss: 0.0020198110542777514
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,128,2364.5,128000,0,0,0,464.62


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-06_14-50-17
  done: false
  episode_len_mean: 462.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9129462242126465
          entropy_coeff: 0.009999999999999998
          kl: 0.014798905718050313
          policy_loss: -0.06310250047180388
          total_loss: -0.07506484331356154
          vf_explained_var: -0.7098937630653381
          vf_loss: 0.0021724864054704085
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,129,2385.93,129000,0,0,0,462.54




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-06_14-50-54
  done: false
  episode_len_mean: 459.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 272
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8757422102822199
          entropy_coeff: 0.009999999999999998
          kl: 0.018138005630186013
          policy_loss: -0.00015947425531016456
          total_loss: -0.011363971398936377
          vf_explained_var: 0.20793242752552032
          vf_loss: 0.0014313499585518407
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,130,2423.66,130000,0,0,0,459.54


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-06_14-51-19
  done: false
  episode_len_mean: 456.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 274
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.399029016494751
          entropy_coeff: 0.009999999999999998
          kl: 0.00799323413422953
          policy_loss: -0.008726393514209323
          total_loss: -0.017944432215558158
          vf_explained_var: -0.04851456731557846
          vf_loss: 0.002074537011018644
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,131,2448.74,131000,0,0,0,456


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-06_14-51-41
  done: false
  episode_len_mean: 454.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 277
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4879621399773493
          entropy_coeff: 0.009999999999999998
          kl: 0.012081752864804838
          policy_loss: -0.04145595093982087
          total_loss: -0.05093408841639757
          vf_explained_var: -0.6816033124923706
          vf_loss: 0.0013238948663153375
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,132,2470.6,132000,0,0,0,454.21


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-06_14-52-02
  done: false
  episode_len_mean: 454.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 279
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6371657027138604
          entropy_coeff: 0.009999999999999998
          kl: 0.011048554525283806
          policy_loss: 0.0012429546978738573
          total_loss: -0.009530220553278923
          vf_explained_var: -0.9631701111793518
          vf_loss: 0.0018695948050460882
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,133,2491.5,133000,0,0,0,454.35


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-06_14-52-23
  done: false
  episode_len_mean: 455.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 281
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7068028383784823
          entropy_coeff: 0.009999999999999998
          kl: 0.016507324504460876
          policy_loss: -0.037407032152016954
          total_loss: -0.04606042479475339
          vf_explained_var: -0.45688918232917786
          vf_loss: 0.002843413225814907
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,134,2512.34,134000,0,0,0,455.29


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-06_14-52-46
  done: false
  episode_len_mean: 452.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 284
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.472089589966668
          entropy_coeff: 0.009999999999999998
          kl: 0.008475856804149787
          policy_loss: -0.1439684406750732
          total_loss: -0.154070759150717
          vf_explained_var: -0.2847822606563568
          vf_loss: 0.0017579745397799545
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,135,2535.53,135000,0,0,0,452.89


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-06_14-53-06
  done: false
  episode_len_mean: 454.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 286
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3542534430821738
          entropy_coeff: 0.009999999999999998
          kl: 0.012043805903389283
          policy_loss: -0.10440951594048076
          total_loss: -0.11246676668524742
          vf_explained_var: 0.32548731565475464
          vf_loss: 0.0014204992104269979
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,136,2555.32,136000,0,0,0,454.01


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-06_14-53-31
  done: false
  episode_len_mean: 452.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 289
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5874063664012485
          entropy_coeff: 0.009999999999999998
          kl: 0.012779967644060067
          policy_loss: -0.08847716661791007
          total_loss: -0.0987074277881119
          vf_explained_var: -0.3985958993434906
          vf_loss: 0.0013305643030131857
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,137,2580.06,137000,0,0,0,452.87


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-06_14-53-56
  done: false
  episode_len_mean: 448.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 292
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.724783064259423
          entropy_coeff: 0.009999999999999998
          kl: 0.006358739219283214
          policy_loss: 0.03637071359488699
          total_loss: 0.022664523724880484
          vf_explained_var: 0.04925107955932617
          vf_loss: 0.0013955657609686669
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,138,2605.36,138000,0,0,0,448.07


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-06_14-54-23
  done: false
  episode_len_mean: 445.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 294
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6381484164132012
          entropy_coeff: 0.009999999999999998
          kl: 0.009339463385675689
          policy_loss: -0.012728391836086908
          total_loss: -0.024825343489646913
          vf_explained_var: -0.07765395194292068
          vf_loss: 0.001132462499339858
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,139,2631.82,139000,0,0,0,445.42


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-06_14-54-47
  done: false
  episode_len_mean: 440.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 297
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7711309353510538
          entropy_coeff: 0.009999999999999998
          kl: 0.012232273323728924
          policy_loss: -0.03044374403026369
          total_loss: -0.04243993312120438
          vf_explained_var: -0.3969481885433197
          vf_loss: 0.0015867270393452296
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,140,2655.74,140000,0,0,0,440.06




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-06_14-55-27
  done: false
  episode_len_mean: 436.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 300
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6869488067097134
          entropy_coeff: 0.009999999999999998
          kl: 0.011704860670499508
          policy_loss: -0.0512522681719727
          total_loss: -0.062469559576776294
          vf_explained_var: -0.6770594716072083
          vf_loss: 0.001701806432619277
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,141,2696.28,141000,0,0,0,436.96


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-06_14-55-51
  done: false
  episode_len_mean: 431.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 303
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7389071570502388
          entropy_coeff: 0.009999999999999998
          kl: 0.015707038230547997
          policy_loss: -0.08193500886360804
          total_loss: -0.09070974712570508
          vf_explained_var: 0.10086729377508163
          vf_loss: 0.0033132124407630826
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,142,2720.12,142000,0,0,0,431.66


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-06_14-56-10
  done: false
  episode_len_mean: 430.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 305
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.755229942003886
          entropy_coeff: 0.009999999999999998
          kl: 0.011913681761513119
          policy_loss: -0.12109130608538786
          total_loss: -0.13328852421707577
          vf_explained_var: -0.2881658673286438
          vf_loss: 0.0013342144258785993
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,143,2738.8,143000,0,0,0,430.35


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-06_14-56-32
  done: false
  episode_len_mean: 429.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 308
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.88028451734119
          entropy_coeff: 0.009999999999999998
          kl: 0.010704362296559071
          policy_loss: -0.07196408982078234
          total_loss: -0.08555707770089309
          vf_explained_var: -0.7299872636795044
          vf_loss: 0.0015971386016139554
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,144,2761.11,144000,0,0,0,429.17


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-06_14-56-55
  done: false
  episode_len_mean: 424.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 311
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8851404203308952
          entropy_coeff: 0.009999999999999998
          kl: 0.010911105241107676
          policy_loss: -0.04493506734466387
          total_loss: -0.05818288474757638
          vf_explained_var: -0.5636374354362488
          vf_loss: 0.0019210875745759243
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,145,2784.29,145000,0,0,0,424.41


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-06_14-57-18
  done: false
  episode_len_mean: 420.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 314
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7133526417944167
          entropy_coeff: 0.009999999999999998
          kl: 0.018090838639147063
          policy_loss: -0.035643704815043344
          total_loss: 0.043590672893656625
          vf_explained_var: -0.428718239068985
          vf_loss: 0.09026224770479732
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,146,2806.61,146000,-0.03,0,-3,420.34


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-06_14-57-40
  done: false
  episode_len_mean: 420.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 316
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6687975181473627
          entropy_coeff: 0.009999999999999998
          kl: 0.012038084204451453
          policy_loss: -0.07941989170180427
          total_loss: -0.07818241599533293
          vf_explained_var: 0.208331897854805
          vf_loss: 0.013862597468929986
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,147,2829.28,147000,-0.03,0,-3,420.45


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-06_14-58-03
  done: false
  episode_len_mean: 415.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 319
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0600599911477833
          entropy_coeff: 0.009999999999999998
          kl: 0.015593187673464135
          policy_loss: 0.03413295253283448
          total_loss: 0.020973727396792836
          vf_explained_var: -0.4830680787563324
          vf_loss: 0.0021786720453140637
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,148,2852.41,148000,-0.03,0,-3,415.56


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-06_14-58-27
  done: false
  episode_len_mean: 410.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.93890952401691
          entropy_coeff: 0.009999999999999998
          kl: 0.013152144781511924
          policy_loss: -0.12639451051751774
          total_loss: -0.13818638631039196
          vf_explained_var: 0.003930139355361462
          vf_loss: 0.0031583713794437547
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,149,2875.73,149000,-0.03,0,-3,410.82


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-06_14-58-50
  done: false
  episode_len_mean: 404.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 325
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9246088676982456
          entropy_coeff: 0.009999999999999998
          kl: 0.013562904761444294
          policy_loss: -0.020920328319900566
          total_loss: -0.032891102424926225
          vf_explained_var: -0.37057816982269287
          vf_loss: 0.0026978313004494543
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,150,2898.84,150000,-0.03,0,-3,404.71


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-06_14-59-12
  done: false
  episode_len_mean: 401.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 328
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.036651113298204
          entropy_coeff: 0.009999999999999998
          kl: 0.01576610774994478
          policy_loss: -0.011168494075536727
          total_loss: -0.023722980419794717
          vf_explained_var: -0.26734137535095215
          vf_loss: 0.0024909606953668925
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,151,2921.02,151000,-0.03,0,-3,401.48




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-06_14-59-54
  done: false
  episode_len_mean: 396.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 331
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.06594408220715
          entropy_coeff: 0.009999999999999998
          kl: 0.010509627343751483
          policy_loss: -0.021367360113395586
          total_loss: -0.03631191274358166
          vf_explained_var: -0.6210172772407532
          vf_loss: 0.0021678876946680248
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,152,2962.59,152000,-0.03,0,-3,396.48


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-06_15-00-13
  done: false
  episode_len_mean: 392.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 334
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1786076333787707
          entropy_coeff: 0.009999999999999998
          kl: 0.0111237690106679
          policy_loss: -0.05789300257133113
          total_loss: -0.07400280978116724
          vf_explained_var: -0.32139432430267334
          vf_loss: 0.0019219993426102316
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,153,2982.25,153000,-0.03,0,-3,392.07


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-06_15-00-36
  done: false
  episode_len_mean: 387.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 337
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.047853276464674
          entropy_coeff: 0.009999999999999998
          kl: 0.012211253588384076
          policy_loss: -0.01611838150355551
          total_loss: -0.03105200682249334
          vf_explained_var: -0.4872833788394928
          vf_loss: 0.00142361085745506
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,154,3004.79,154000,-0.03,0,-3,387.34


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-06_15-00-57
  done: false
  episode_len_mean: 385.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 339
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2217971404393513
          entropy_coeff: 0.009999999999999998
          kl: 0.012454569411420695
          policy_loss: -0.060727354677187075
          total_loss: -0.07733131378061242
          vf_explained_var: -1.0
          vf_loss: 0.0014105927419020897
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,155,3026.28,155000,-0.03,0,-3,385.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-06_15-01-18
  done: false
  episode_len_mean: 383.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 342
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.182830391989814
          entropy_coeff: 0.009999999999999998
          kl: 0.014233085766576378
          policy_loss: -0.046628349812494384
          total_loss: -0.06187774216135343
          vf_explained_var: -0.6052535176277161
          vf_loss: 0.0017752420849218551
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,156,3047.03,156000,-0.03,0,-3,383.36


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-06_15-01-42
  done: false
  episode_len_mean: 378.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 345
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.162757831149631
          entropy_coeff: 0.009999999999999998
          kl: 0.012327829148335804
          policy_loss: 0.07727342065837649
          total_loss: 0.06124446400337749
          vf_explained_var: -0.3168719410896301
          vf_loss: 0.0014379772655148473
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,157,3070.53,157000,-0.03,0,-3,378.71


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-06_15-02-01
  done: false
  episode_len_mean: 376.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 347
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1722494496239557
          entropy_coeff: 0.009999999999999998
          kl: 0.014121234567506422
          policy_loss: -0.10701605168481668
          total_loss: -0.12211683872673247
          vf_explained_var: -0.040504589676856995
          vf_loss: 0.001855789285360111
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,158,3089.45,158000,-0.03,0,-3,376.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-06_15-02-22
  done: false
  episode_len_mean: 374.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 350
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1853133890363905
          entropy_coeff: 0.009999999999999998
          kl: 0.01485090329184852
          policy_loss: -0.0527657262980938
          total_loss: -0.06721455018139548
          vf_explained_var: -0.45185890793800354
          vf_loss: 0.0023921277994910875
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,159,3111.21,159000,-0.03,0,-3,374.05


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-06_15-02-43
  done: false
  episode_len_mean: 375.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 353
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.166139531135559
          entropy_coeff: 0.009999999999999998
          kl: 0.011852927165676909
          policy_loss: -0.0631925054722362
          total_loss: -0.0795747987098164
          vf_explained_var: -0.3551309406757355
          vf_loss: 0.0012787395150452437
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,160,3131.68,160000,-0.03,0,-3,375.24


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-06_15-03-06
  done: false
  episode_len_mean: 373.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 356
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1608250591490004
          entropy_coeff: 0.009999999999999998
          kl: 0.012949723226899025
          policy_loss: -0.014807979224456681
          total_loss: -0.031125796462098756
          vf_explained_var: -0.5401312112808228
          vf_loss: 0.0009199001703463081
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,161,3154.22,161000,-0.03,0,-3,373.59


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-06_15-03-25
  done: false
  episode_len_mean: 374.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 358
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1298155784606934
          entropy_coeff: 0.009999999999999998
          kl: 0.008389102401163168
          policy_loss: 0.011129713762137625
          total_loss: -0.006227450031373236
          vf_explained_var: -1.0
          vf_loss: 0.0011096705119901648
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,162,3173.49,162000,-0.03,0,-3,374.84




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-06_15-04-01
  done: false
  episode_len_mean: 376.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 360
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.719845023420122
          entropy_coeff: 0.009999999999999998
          kl: 0.013772020644890127
          policy_loss: -0.10151028869052728
          total_loss: -0.1117662955282463
          vf_explained_var: 0.1113848015666008
          vf_loss: 0.0022943877589164507
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,163,3210.08,163000,-0.03,0,-3,376.97


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-06_15-04-24
  done: false
  episode_len_mean: 375.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 363
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1295008182525637
          entropy_coeff: 0.009999999999999998
          kl: 0.013619678548268866
          policy_loss: -0.05180545722444852
          total_loss: -0.06719897670878304
          vf_explained_var: -0.4841125011444092
          vf_loss: 0.0013048472600833824
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,164,3232.96,164000,-0.03,0,-3,375.78


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-06_15-04-48
  done: false
  episode_len_mean: 372.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 366
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1420669343736436
          entropy_coeff: 0.009999999999999998
          kl: 0.012879907194232557
          policy_loss: -0.09139517032437855
          total_loss: -0.10718897899819745
          vf_explained_var: -0.7593534588813782
          vf_loss: 0.001279891839173312
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,165,3256.29,165000,-0.03,0,-3,372.6


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-06_15-05-09
  done: false
  episode_len_mean: 370.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 369
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0327259845203822
          entropy_coeff: 0.009999999999999998
          kl: 0.011068144495516978
          policy_loss: -0.03367552167425553
          total_loss: -0.04865365983504388
          vf_explained_var: -0.8252345323562622
          vf_loss: 0.001613623609430053
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,166,3277.61,166000,-0.03,0,-3,370.96


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-06_15-05-31
  done: false
  episode_len_mean: 369.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 372
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.817118227481842
          entropy_coeff: 0.009999999999999998
          kl: 0.012185015072334653
          policy_loss: 0.044052204800148806
          total_loss: 0.031174012252853975
          vf_explained_var: -0.13943174481391907
          vf_loss: 0.0011805459446299614
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,167,3299.86,167000,-0.03,0,-3,369.73


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-06_15-05-55
  done: false
  episode_len_mean: 368.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 375
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9600609527693855
          entropy_coeff: 0.009999999999999998
          kl: 0.01242908364764435
          policy_loss: -0.12052845545113086
          total_loss: -0.13490830798529915
          vf_explained_var: -0.5337414741516113
          vf_loss: 0.0010259395394112086
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,168,3323.77,168000,-0.03,0,-3,368.61


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-06_15-06-18
  done: false
  episode_len_mean: 366.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 378
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9266511029667324
          entropy_coeff: 0.009999999999999998
          kl: 0.009308524213167413
          policy_loss: 0.004791793951557742
          total_loss: -0.010605439460939831
          vf_explained_var: -0.9335291385650635
          vf_loss: 0.0007276497739237837
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,169,3346.68,169000,-0.03,0,-3,366.91


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-06_15-06-40
  done: false
  episode_len_mean: 365.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 380
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0186809685495164
          entropy_coeff: 0.009999999999999998
          kl: 0.015353557891928047
          policy_loss: -0.05748223505086369
          total_loss: -0.07118922571341196
          vf_explained_var: -0.7982043027877808
          vf_loss: 0.001297993248509657
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,170,3368.58,170000,-0.03,0,-3,365.83


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-06_15-07-04
  done: false
  episode_len_mean: 362.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 383
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.008729174402025
          entropy_coeff: 0.009999999999999998
          kl: 0.008757525852345991
          policy_loss: -0.08760210817886724
          total_loss: -0.10412505873375469
          vf_explained_var: -0.43498852849006653
          vf_loss: 0.0006086749765219995
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,171,3392.17,171000,-0.03,0,-3,362.71


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-06_15-07-27
  done: false
  episode_len_mean: 360.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 386
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8677106685108609
          entropy_coeff: 0.009999999999999998
          kl: 0.017720651084615034
          policy_loss: -0.029729729725254908
          total_loss: -0.04048621513777309
          vf_explained_var: -0.2872082591056824
          vf_loss: 0.0019399014631441484
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,172,3415.47,172000,-0.03,0,-3,360.01


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-06_15-07-49
  done: false
  episode_len_mean: 359.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 389
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8596998122003343
          entropy_coeff: 0.009999999999999998
          kl: 0.013038551383537664
          policy_loss: -0.01023402073317104
          total_loss: -0.023478357411093182
          vf_explained_var: -0.5584292411804199
          vf_loss: 0.0009521489995273037
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,173,3437.05,173000,-0.03,0,-3,359.22




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-06_15-08-30
  done: false
  episode_len_mean: 358.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 392
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7988107535574172
          entropy_coeff: 0.009999999999999998
          kl: 0.015104037971724004
          policy_loss: -0.09996020570397376
          total_loss: -0.11151375489102469
          vf_explained_var: -0.529030442237854
          vf_loss: 0.0013369450345635415
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,174,3478.72,174000,-0.03,0,-3,358.84


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-06_15-08-53
  done: false
  episode_len_mean: 358.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 395
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8955735312567816
          entropy_coeff: 0.009999999999999998
          kl: 0.020797005392699162
          policy_loss: -0.058331556887262395
          total_loss: -0.06844342346820566
          vf_explained_var: -0.42725682258605957
          vf_loss: 0.0018248793636707382
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,175,3501.56,175000,-0.03,0,-3,358.99


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-06_15-09-17
  done: false
  episode_len_mean: 357.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 398
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9827707886695862
          entropy_coeff: 0.009999999999999998
          kl: 0.010315581545711754
          policy_loss: -0.04425957024925285
          total_loss: -0.058091499283909796
          vf_explained_var: -1.0
          vf_loss: 0.0007735147438425985
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.168.3.5
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,176,3525.11,176000,-0.03,0,-3,357.8


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-06_15-09-41
  done: false
  episode_len_mean: 357.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 401
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9438893755276998
          entropy_coeff: 0.009999999999999998
          kl: 0.01283284994277425
          policy_loss: -0.03298403273026149
          total_loss: -0.0449607118136353
          vf_explained_var: -0.6310948133468628
          vf_loss: 0.0009655820335158044
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,177,3548.97,177000,-0.03,0,-3,357.53


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-06_15-10-05
  done: false
  episode_len_mean: 357.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 404
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8571387900246514
          entropy_coeff: 0.009999999999999998
          kl: 0.00783505822157085
          policy_loss: -0.00899026518066724
          total_loss: -0.022456053189105457
          vf_explained_var: 0.06570732593536377
          vf_loss: 0.0011390999622461903
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,178,3573.54,178000,-0.03,0,-3,357.07


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-06_15-10-29
  done: false
  episode_len_mean: 355.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 406
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9234975179036458
          entropy_coeff: 0.009999999999999998
          kl: 0.009836641185118003
          policy_loss: -0.07394873723387718
          total_loss: -0.0866894110209412
          vf_explained_var: -0.7271720170974731
          vf_loss: 0.001514499693358731
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,179,3597.04,179000,-0.03,0,-3,355.11


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-06_15-10-50
  done: false
  episode_len_mean: 356.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 409
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.668787243631151
          entropy_coeff: 0.009999999999999998
          kl: 0.013056370521414746
          policy_loss: -0.055780418962240216
          total_loss: -0.06442020196053717
          vf_explained_var: -0.21967129409313202
          vf_loss: 0.0014383029609840983
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,180,3618.44,180000,-0.03,0,-3,356.31


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-06_15-11-10
  done: false
  episode_len_mean: 357.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 412
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6958423150910271
          entropy_coeff: 0.009999999999999998
          kl: 0.008862532313620126
          policy_loss: -0.05279140066769388
          total_loss: -0.06380320315559705
          vf_explained_var: -0.5682177543640137
          vf_loss: 0.0014599652566377901
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,181,3638.34,181000,-0.03,0,-3,357.09


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-06_15-11-29
  done: false
  episode_len_mean: 359.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 414
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8522044155332777
          entropy_coeff: 0.009999999999999998
          kl: 0.00955412172872428
          policy_loss: -0.03760365636812316
          total_loss: -0.049153011271523106
          vf_explained_var: -0.42821282148361206
          vf_loss: 0.0021359154249593203
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,182,3657.71,182000,0,0,0,359.08


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-06_15-11-51
  done: false
  episode_len_mean: 359.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 417
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9508200301064385
          entropy_coeff: 0.009999999999999998
          kl: 0.01230279447511461
          policy_loss: -0.04571242295205593
          total_loss: -0.05716635990473959
          vf_explained_var: -0.11575336754322052
          vf_loss: 0.0018259704720953274
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,183,3679.17,183000,0,0,0,359.84


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-06_15-12-09
  done: false
  episode_len_mean: 361.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 419
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8354124400350782
          entropy_coeff: 0.009999999999999998
          kl: 0.01212716038027766
          policy_loss: -0.04402380400440759
          total_loss: -0.05444001755159762
          vf_explained_var: -0.2498028427362442
          vf_loss: 0.001798534448284449
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,184,3697.74,184000,0,0,0,361.43




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-06_15-12-46
  done: false
  episode_len_mean: 363.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 421
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.989713924460941
          entropy_coeff: 0.009999999999999998
          kl: 0.01031682144116175
          policy_loss: -0.09854390803310606
          total_loss: -0.11197478050986925
          vf_explained_var: -0.3486597239971161
          vf_loss: 0.001243374336303936
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,185,3734.65,185000,0,0,0,363.64


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-06_15-13-07
  done: false
  episode_len_mean: 365.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 424
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6039399438434176
          entropy_coeff: 0.009999999999999998
          kl: 0.0027077998955776414
          policy_loss: -0.14131515125433605
          total_loss: -0.15402036060889562
          vf_explained_var: 0.24631722271442413
          vf_loss: 0.0019633650178244957
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,186,3755.15,186000,-0.03,0,-3,365.78


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-06_15-13-26
  done: false
  episode_len_mean: 368.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 426
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9392181396484376
          entropy_coeff: 0.009999999999999998
          kl: 0.016829887127788642
          policy_loss: -0.037724780498279466
          total_loss: -0.05064839344057772
          vf_explained_var: -0.47739362716674805
          vf_loss: 0.002208499684800497
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,187,3774.1,187000,-0.03,0,-3,368.06


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-06_15-13-45
  done: false
  episode_len_mean: 369.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 428
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7638010263442994
          entropy_coeff: 0.009999999999999998
          kl: 0.017021256002899145
          policy_loss: -0.027939970253242387
          total_loss: -0.0333556368533108
          vf_explained_var: 0.3493032157421112
          vf_loss: 0.007913839040944974
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,188,3792.71,188000,-0.03,0,-3,369.69


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-06_15-14-02
  done: false
  episode_len_mean: 372.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 430
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7845696104897393
          entropy_coeff: 0.009999999999999998
          kl: 0.012223102028673752
          policy_loss: -0.11415083055487937
          total_loss: -0.12708195220265123
          vf_explained_var: -0.35912686586380005
          vf_loss: 0.0018206004842391444
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,189,3810.48,189000,-0.03,0,-3,372.26


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-06_15-14-21
  done: false
  episode_len_mean: 375.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 432
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0130669289165075
          entropy_coeff: 0.009999999999999998
          kl: 0.011900796560085346
          policy_loss: 0.006978114280435774
          total_loss: 0.0315433735648791
          vf_explained_var: -0.42403602600097656
          vf_loss: 0.04168354282155633
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,190,3829,190000,-0.08,0,-5,375.69


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-06_15-14-39
  done: false
  episode_len_mean: 376.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 434
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.686013850900862
          entropy_coeff: 0.009999999999999998
          kl: 0.014417790998703791
          policy_loss: -0.03516579104794396
          total_loss: -0.04188737140761482
          vf_explained_var: 0.5206788182258606
          vf_loss: 0.006489053939003497
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,191,3846.97,191000,-0.08,0,-5,376.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-06_15-14-55
  done: false
  episode_len_mean: 380.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 436
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.552247620953454
          entropy_coeff: 0.009999999999999998
          kl: 0.014357457594032917
          policy_loss: 0.05601380674375428
          total_loss: 0.04914367637700505
          vf_explained_var: 0.34849119186401367
          vf_loss: 0.005018113994608737
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,192,3862.65,192000,-0.08,0,-5,380.33


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-06_15-15-13
  done: false
  episode_len_mean: 382.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 438
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9525668091244168
          entropy_coeff: 0.009999999999999998
          kl: 0.01694052910390786
          policy_loss: -0.040208646613690585
          total_loss: -0.05255649644467566
          vf_explained_var: -0.5546086430549622
          vf_loss: 0.002889747519253029
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,193,3881.39,193000,-0.08,0,-5,382.29


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-06_15-15-33
  done: false
  episode_len_mean: 384.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 441
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8615676919619242
          entropy_coeff: 0.009999999999999998
          kl: 0.015569476489244179
          policy_loss: -0.11874061189591885
          total_loss: -0.13166276684237851
          vf_explained_var: 0.3092082738876343
          vf_loss: 0.0017524983045101787
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,194,3900.92,194000,-0.08,0,-5,384.64


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-06_15-15-53
  done: false
  episode_len_mean: 385.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 443
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7897493309444852
          entropy_coeff: 0.009999999999999998
          kl: 0.018175695455049972
          policy_loss: -0.08487277788420518
          total_loss: -0.09317674297425482
          vf_explained_var: 0.07351525872945786
          vf_loss: 0.0049928092184321335
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,195,3921.31,195000,-0.08,0,-5,385.76


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-06_15-16-14
  done: false
  episode_len_mean: 387.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 446
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9683954649501376
          entropy_coeff: 0.009999999999999998
          kl: 0.015308410485286252
          policy_loss: 0.019012845390372807
          total_loss: 0.004625326808955934
          vf_explained_var: -0.6120924353599548
          vf_loss: 0.0014214950639547573
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,196,3941.97,196000,-0.08,0,-5,387.53


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-06_15-16-33
  done: false
  episode_len_mean: 388.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 448
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.747755049334632
          entropy_coeff: 0.009999999999999998
          kl: 0.02099083869114484
          policy_loss: -0.00227513187047508
          total_loss: -0.01213143900450733
          vf_explained_var: -0.8848100304603577
          vf_loss: 0.002307937054946605
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,197,3961.07,197000,-0.08,0,-5,388.05




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-06_15-17-12
  done: false
  episode_len_mean: 388.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 451
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.79244884385003
          entropy_coeff: 0.009999999999999998
          kl: 0.01651359657574629
          policy_loss: -0.07197956846406063
          total_loss: -0.0811955826357007
          vf_explained_var: -0.44770148396492004
          vf_loss: 0.0024384679434458828
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,198,4000.47,198000,-0.08,0,-5,388


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-06_15-17-31
  done: false
  episode_len_mean: 389.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 453
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.796681527296702
          entropy_coeff: 0.009999999999999998
          kl: 0.013244854785041997
          policy_loss: -0.05391899368001355
          total_loss: -0.06556187127199438
          vf_explained_var: -0.08302871882915497
          vf_loss: 0.00129503295950902
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,199,4019.24,199000,-0.08,0,-5,389.34


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-06_15-17-51
  done: false
  episode_len_mean: 390.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 455
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8934671070840623
          entropy_coeff: 0.009999999999999998
          kl: 0.014288168465628262
          policy_loss: 0.10875184966458215
          total_loss: 0.09628962646755908
          vf_explained_var: -0.7462552189826965
          vf_loss: 0.0010474091877565822
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,200,4039.36,200000,-0.08,0,-5,390.79


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-06_15-18-10
  done: false
  episode_len_mean: 392.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 458
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5346080621083578
          entropy_coeff: 0.009999999999999998
          kl: 0.010317741738702797
          policy_loss: -0.20470884999053346
          total_loss: -0.21395584086163177
          vf_explained_var: -0.37259531021118164
          vf_loss: 0.0021815764083940948
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,201,4058.21,201000,-0.08,0,-5,392.78


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-06_15-18-29
  done: false
  episode_len_mean: 391.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 460
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9121108068360222
          entropy_coeff: 0.009999999999999998
          kl: 0.013251229695353325
          policy_loss: -0.015445235081844859
          total_loss: -0.028128819995456272
          vf_explained_var: -0.5699512362480164
          vf_loss: 0.0014061961432970647
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,202,4077.24,202000,-0.08,0,-5,391.03


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-06_15-18-50
  done: false
  episode_len_mean: 391.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 462
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7282828503184848
          entropy_coeff: 0.009999999999999998
          kl: 0.0014920266397209325
          policy_loss: -0.0843162778351042
          total_loss: -0.0998087477352884
          vf_explained_var: 0.3195388913154602
          vf_loss: 0.001223854862877892
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,203,4098.27,203000,-0.08,0,-5,391.99


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-06_15-19-12
  done: false
  episode_len_mean: 394.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 465
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9717708561155531
          entropy_coeff: 0.009999999999999998
          kl: 0.009461863397696332
          policy_loss: -0.04107375236021148
          total_loss: -0.04952573006351789
          vf_explained_var: -0.42398884892463684
          vf_loss: 0.009469455456645745
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,204,4120,204000,-0.1,0,-5,394.15


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-06_15-19-33
  done: false
  episode_len_mean: 395.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 467
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7572815736134848
          entropy_coeff: 0.009999999999999998
          kl: 0.03143558698254129
          policy_loss: -0.03408896674712499
          total_loss: -0.04214694698651632
          vf_explained_var: 0.3005373775959015
          vf_loss: 0.003546980822445928
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,205,4141.23,205000,-0.1,0,-5,395.22


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-06_15-19-53
  done: false
  episode_len_mean: 394.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 470
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9947074717945523
          entropy_coeff: 0.009999999999999998
          kl: 0.012912568625011625
          policy_loss: -0.07890326413843367
          total_loss: -0.09408770973483721
          vf_explained_var: 0.04421741142868996
          vf_loss: 0.0010855722865219125
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,206,4160.98,206000,-0.1,0,-5,394.25


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-06_15-20-13
  done: false
  episode_len_mean: 396.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 472
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6949232074949476
          entropy_coeff: 0.009999999999999998
          kl: 0.006762512648192577
          policy_loss: -0.05499947526388698
          total_loss: -0.0028721228034959898
          vf_explained_var: 0.3596898317337036
          vf_loss: 0.06715085351671506
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,207,4180.92,207000,-0.1,0,-5,396.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-06_15-20-34
  done: false
  episode_len_mean: 400.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 475
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5236819465955098
          entropy_coeff: 0.009999999999999998
          kl: 0.010743749902852241
          policy_loss: 0.033764152394400705
          total_loss: 0.04338355130619473
          vf_explained_var: 0.13710154592990875
          vf_loss: 0.021796771645959882
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,208,4201.3,208000,-0.14,0,-5,400.51


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-06_15-20-52
  done: false
  episode_len_mean: 403.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 477
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.463067384560903
          entropy_coeff: 0.009999999999999998
          kl: 0.006483986061035946
          policy_loss: -0.03546879589557648
          total_loss: 0.025003352926837072
          vf_explained_var: 0.1543092578649521
          vf_loss: 0.07325640870258213
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,209,4219.85,209000,-0.15,0,-5,403.26


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-06_15-21-11
  done: false
  episode_len_mean: 404.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 479
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.549419429567125
          entropy_coeff: 0.009999999999999998
          kl: 0.00741301395875554
          policy_loss: -0.012538059376594093
          total_loss: 0.05989722694373793
          vf_explained_var: 0.5337805151939392
          vf_loss: 0.0858185073464281
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,210,4238.86,210000,-0.15,0,-5,404.64




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-06_15-21-50
  done: false
  episode_len_mean: 405.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 482
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6047101153267755
          entropy_coeff: 0.009999999999999998
          kl: 0.012698690646814172
          policy_loss: -0.11856550532910559
          total_loss: -0.11349004689190123
          vf_explained_var: 0.5027313828468323
          vf_loss: 0.01750641026948061
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,211,4277.83,211000,-0.15,0,-5,405.06


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-06_15-22-12
  done: false
  episode_len_mean: 405.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 484
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3907375819153256
          entropy_coeff: 0.009999999999999998
          kl: 0.012569698925534596
          policy_loss: -0.07466514205767048
          total_loss: 0.008990140424834358
          vf_explained_var: 0.7858733534812927
          vf_loss: 0.09398324142417146
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,212,4299.7,212000,-0.19,0,-5,405.48


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-06_15-22-31
  done: false
  episode_len_mean: 407.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 486
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3622781104511685
          entropy_coeff: 0.009999999999999998
          kl: 0.0122329896085403
          policy_loss: -0.04006696738716629
          total_loss: 0.11766472104936838
          vf_explained_var: 0.6333124041557312
          vf_loss: 0.1678709359218677
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,213,4318.8,213000,-0.22,0,-5,407.32


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-06_15-22-49
  done: false
  episode_len_mean: 411.68
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 488
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2320400502946642
          entropy_coeff: 0.009999999999999998
          kl: 0.010511391934735768
          policy_loss: -0.040071031699577965
          total_loss: 0.0635955804751979
          vf_explained_var: 0.44597527384757996
          vf_loss: 0.1129937297768063
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,214,4336.77,214000,-0.2,2,-5,411.68


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-06_15-23-05
  done: false
  episode_len_mean: 413.41
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 490
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5503473957379659
          entropy_coeff: 0.009999999999999998
          kl: 0.012963450359157481
          policy_loss: -0.025762749815152752
          total_loss: -0.011513589612311787
          vf_explained_var: 0.20142440497875214
          vf_loss: 0.02606108703960975
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,215,4353.02,215000,-0.22,2,-5,413.41


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-06_15-23-22
  done: false
  episode_len_mean: 417.46
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.19
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 492
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5841576682196723
          entropy_coeff: 0.009999999999999998
          kl: 0.012166248229907634
          policy_loss: -0.06672520024908914
          total_loss: -0.021548920538690355
          vf_explained_var: 0.48568305373191833
          vf_loss: 0.05755333417198724
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,216,4369.75,216000,-0.19,3,-5,417.46


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-06_15-23-40
  done: false
  episode_len_mean: 420.47
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.19
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 494
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5899519907103645
          entropy_coeff: 0.009999999999999998
          kl: 0.010026027014908939
          policy_loss: 0.053962827887800005
          total_loss: 0.08107474599447516
          vf_explained_var: 0.3919687569141388
          vf_loss: 0.040156370013331374
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,217,4387.63,217000,-0.19,3,-5,420.47


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-06_15-24-00
  done: false
  episode_len_mean: 422.52
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.08
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 497
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7774225804540846
          entropy_coeff: 0.009999999999999998
          kl: 0.013633027029045083
          policy_loss: -0.0811694189078278
          total_loss: 0.1258425972941849
          vf_explained_var: 0.3179531395435333
          vf_loss: 0.22090402436442674
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,218,4407.44,218000,-0.08,7,-5,422.52


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-06_15-24-17
  done: false
  episode_len_mean: 425.55
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 499
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7420897483825684
          entropy_coeff: 0.009999999999999998
          kl: 0.013185399828090914
          policy_loss: -0.0325418084859848
          total_loss: 0.022454662269188297
          vf_explained_var: 0.12477654218673706
          vf_loss: 0.06866261667520222
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,219,4424.08,219000,-0.05,7,-5,425.55


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-06_15-24-32
  done: false
  episode_len_mean: 430.24
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 501
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.639753778775533
          entropy_coeff: 0.009999999999999998
          kl: 0.015545712419586818
          policy_loss: -0.14424142978257604
          total_loss: -0.12060673911538389
          vf_explained_var: 0.3256220817565918
          vf_loss: 0.03560534461090962
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,220,4439.45,220000,-0.05,7,-5,430.24


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-06_15-24-49
  done: false
  episode_len_mean: 433.62
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 503
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5162927826245627
          entropy_coeff: 0.009999999999999998
          kl: 0.007872283811132124
          policy_loss: -0.20067011424236827
          total_loss: -0.2009511356552442
          vf_explained_var: 0.436064749956131
          vf_loss: 0.012640150642902073
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,221,4456.76,221000,-0.09,7,-5,433.62


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-06_15-25-06
  done: false
  episode_len_mean: 434.05
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 504
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6802881757418315
          entropy_coeff: 0.009999999999999998
          kl: 0.012400327702364006
          policy_loss: -0.12851264207727378
          total_loss: -0.13313227776024078
          vf_explained_var: 0.6871883869171143
          vf_loss: 0.008652059273380372
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,222,4473.84,222000,-0.09,7,-5,434.05


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-06_15-25-25
  done: false
  episode_len_mean: 438.29
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.09
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 507
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5498856557740106
          entropy_coeff: 0.009999999999999998
          kl: 0.013620741687699405
          policy_loss: -0.02073767234881719
          total_loss: -0.02501654616660542
          vf_explained_var: 0.5831881761550903
          vf_loss: 0.00734126056647963
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,223,4492.13,223000,-0.09,7,-5,438.29


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-06_15-25-42
  done: false
  episode_len_mean: 440.79
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 509
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6992681980133058
          entropy_coeff: 0.009999999999999998
          kl: 0.010857915732754304
          policy_loss: -0.10277600238720576
          total_loss: 0.020994133833381865
          vf_explained_var: 0.21076057851314545
          vf_loss: 0.13767085713851784
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,224,4509.02,224000,-0.05,7,-5,440.79




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-06_15-26-17
  done: false
  episode_len_mean: 441.57
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 511
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6564648800426058
          entropy_coeff: 0.009999999999999998
          kl: 0.02182026878309434
          policy_loss: -0.003712109559112125
          total_loss: 0.0016702586577998267
          vf_explained_var: 0.31537920236587524
          vf_loss: 0.015733355307020247
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,225,4544.38,225000,-0.05,7,-5,441.57


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-06_15-26-38
  done: false
  episode_len_mean: 441.33
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: -0.01
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 514
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6993258264329698
          entropy_coeff: 0.009999999999999998
          kl: 0.013083689704287435
          policy_loss: -0.06510227918624878
          total_loss: 0.06478564333584573
          vf_explained_var: 0.43406447768211365
          vf_loss: 0.1412925072428253
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,226,4565.25,226000,-0.01,7,-5,441.33


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-06_15-26-53
  done: false
  episode_len_mean: 443.11
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.02
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 515
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.5194018218252394
          entropy_coeff: 0.009999999999999998
          kl: 0.00834522217123735
          policy_loss: -0.1524125191072623
          total_loss: -0.11177187727557289
          vf_explained_var: 0.7351833581924438
          vf_loss: 0.052270011878055
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,227,4580.05,227000,0.02,7,-5,443.11


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-06_15-27-12
  done: false
  episode_len_mean: 444.2
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.02
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 518
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.602643566661411
          entropy_coeff: 0.009999999999999998
          kl: 0.011431628842892236
          policy_loss: -0.06120569325155682
          total_loss: -0.006748779780334896
          vf_explained_var: 0.56852126121521
          vf_loss: 0.06560034525270263
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,228,4599.59,228000,0.02,7,-5,444.2


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-06_15-27-31
  done: false
  episode_len_mean: 445.69
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.1
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 520
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8964812848303052
          entropy_coeff: 0.009999999999999998
          kl: 0.009027266759228238
          policy_loss: -0.09796808461348215
          total_loss: 0.13871586016482776
          vf_explained_var: 0.07882210612297058
          vf_loss: 0.25179277724172505
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,229,4618.13,229000,0.1,7,-5,445.69


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-06_15-27-50
  done: false
  episode_len_mean: 445.15
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.09
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 522
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.5395117786195542
          entropy_coeff: 0.009999999999999998
          kl: 0.022451010468919227
          policy_loss: 0.11256813560095098
          total_loss: 0.16372129174156322
          vf_explained_var: 0.6816317439079285
          vf_loss: 0.056958358962502745
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,230,4637.31,230000,0.09,7,-5,445.15


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-06_15-28-10
  done: false
  episode_len_mean: 444.48
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.16
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 525
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8977167725563049
          entropy_coeff: 0.009999999999999998
          kl: 0.012457062747378907
          policy_loss: -0.03946738690137863
          total_loss: -0.033497603899902764
          vf_explained_var: 0.4780016243457794
          vf_loss: 0.016965429981549582
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,231,4656.99,231000,0.16,7,-5,444.48


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-06_15-28-28
  done: false
  episode_len_mean: 444.47
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.19
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 527
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7688631825976902
          entropy_coeff: 0.009999999999999998
          kl: 0.009966281031854863
          policy_loss: -0.07617586743500497
          total_loss: -0.010966309077209897
          vf_explained_var: -0.4258210062980652
          vf_loss: 0.0765125673600576
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,232,4675.64,232000,0.19,7,-5,444.47


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-06_15-28-48
  done: false
  episode_len_mean: 443.85
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.21
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 529
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7683011015256247
          entropy_coeff: 0.009999999999999998
          kl: 0.007648494795893216
          policy_loss: -0.13719354586468802
          total_loss: -0.09991046736637751
          vf_explained_var: 0.6352459192276001
          vf_loss: 0.05006552324112919
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,233,4695.36,233000,0.21,7,-5,443.85


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-06_15-29-08
  done: false
  episode_len_mean: 441.26
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.34
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 532
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.437370389699936
          entropy_coeff: 0.009999999999999998
          kl: 0.010741267473591323
          policy_loss: 0.03956619302431742
          total_loss: 0.10127337492174572
          vf_explained_var: 0.7236528992652893
          vf_loss: 0.06919871204429202
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,234,4715.1,234000,0.34,7,-4,441.26


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-06_15-29-29
  done: false
  episode_len_mean: 439.68
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.35
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 534
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7557645744747585
          entropy_coeff: 0.009999999999999998
          kl: 0.013875800858523689
          policy_loss: -0.02190773296687338
          total_loss: 0.15093680077956784
          vf_explained_var: 0.8038637638092041
          vf_loss: 0.1815116409626272
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,235,4735.83,235000,0.35,7,-4,439.68


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-06_15-29-49
  done: false
  episode_len_mean: 437.2
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.47
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 537
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7886817706955804
          entropy_coeff: 0.009999999999999998
          kl: 0.011409410396695168
          policy_loss: -0.052583065463436975
          total_loss: 0.12441169809963969
          vf_explained_var: 0.8590144515037537
          vf_loss: 0.18757131761974757
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,236,4755.85,236000,0.47,7,-4,437.2




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-06_15-30-27
  done: false
  episode_len_mean: 435.22
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.5
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 540
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.48690531651179
          entropy_coeff: 0.009999999999999998
          kl: 0.024250310375824095
          policy_loss: 0.003066879345311059
          total_loss: 0.6931225150409672
          vf_explained_var: 0.5861101150512695
          vf_loss: 0.689386957221561
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,237,4794.26,237000,0.5,7,-4,435.22


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-06_15-30-51
  done: false
  episode_len_mean: 434.53
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.53
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 542
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7484933972358703
          entropy_coeff: 0.009999999999999998
          kl: 0.009413070882391135
          policy_loss: -0.004561316221952438
          total_loss: 0.08640915287865533
          vf_explained_var: 0.1311817765235901
          vf_loss: 0.0994086522815956
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,238,4818.12,238000,0.53,7,-4,434.53


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-06_15-31-13
  done: false
  episode_len_mean: 434.26
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.57
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 545
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6625593993398877
          entropy_coeff: 0.009999999999999998
          kl: 0.008414328859286217
          policy_loss: -0.04530797716644075
          total_loss: 0.2006429712391562
          vf_explained_var: 0.7659650444984436
          vf_loss: 0.25448966208431456
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,239,4840.27,239000,0.57,7,-4,434.26




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-06_15-32-15
  done: false
  episode_len_mean: 431.6
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.69
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 548
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7685914701885648
          entropy_coeff: 0.009999999999999998
          kl: 0.004475557801943333
          policy_loss: 0.03126644910209709
          total_loss: 0.1572801137963931
          vf_explained_var: 0.751720666885376
          vf_loss: 0.13939819203482734
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,240,4902.02,240000,0.69,7,-4,431.6




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-06_15-33-01
  done: false
  episode_len_mean: 431.1
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.81
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 550
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6149441440900167
          entropy_coeff: 0.009999999999999998
          kl: 0.014815183706722256
          policy_loss: -0.10221825299991502
          total_loss: 0.08715670191579394
          vf_explained_var: 0.7223109602928162
          vf_loss: 0.1984050799989038
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,241,4947.6,241000,0.81,7,-4,431.1


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-06_15-33-24
  done: false
  episode_len_mean: 429.36
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.91
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 553
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5132343848546346
          entropy_coeff: 0.009999999999999998
          kl: 0.01075688963399928
          policy_loss: -0.10354930924044715
          total_loss: -0.013398862712913089
          vf_explained_var: 0.8518863916397095
          vf_loss: 0.10011365010092656
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,242,4971.1,242000,0.91,7,-4,429.36


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-06_15-33-47
  done: false
  episode_len_mean: 427.62
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 0.98
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 556
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6422847867012025
          entropy_coeff: 0.009999999999999998
          kl: 0.011893741107332505
          policy_loss: -0.11582093615498808
          total_loss: 0.058503003263225155
          vf_explained_var: 0.6608307361602783
          vf_loss: 0.18503134710093339
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,243,4993.56,243000,0.98,7,-4,427.62


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-06_15-34-11
  done: false
  episode_len_mean: 424.86
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.06
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 559
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5113967961735195
          entropy_coeff: 0.009999999999999998
          kl: 0.009964033848671821
          policy_loss: 0.030517485075526766
          total_loss: 0.3287543192505836
          vf_explained_var: 0.5442683696746826
          vf_loss: 0.3085626669228077
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,244,5018.32,244000,1.06,7,-4,424.86


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-06_15-34-34
  done: false
  episode_len_mean: 423.4
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.16
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 562
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7282441443867154
          entropy_coeff: 0.009999999999999998
          kl: 0.010214353663393475
          policy_loss: -0.06314913953344027
          total_loss: 0.05335738737550047
          vf_explained_var: 0.7637475728988647
          vf_loss: 0.12888053953647613
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,245,5041.49,245000,1.16,7,-4,423.4




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-06_15-35-45
  done: false
  episode_len_mean: 419.68
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.31
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 565
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5049924784236484
          entropy_coeff: 0.009999999999999998
          kl: 0.008124183866102195
          policy_loss: -0.12051109431518449
          total_loss: -0.02421238687303331
          vf_explained_var: 0.8612977862358093
          vf_loss: 0.10744461909764343
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,246,5112.01,246000,1.31,7,-4,419.68


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-06_15-36-07
  done: false
  episode_len_mean: 419.96
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.43
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 567
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4634953949186538
          entropy_coeff: 0.009999999999999998
          kl: 0.010679612042098845
          policy_loss: -0.116810149865018
          total_loss: 0.05095439379413923
          vf_explained_var: 0.7310662865638733
          vf_loss: 0.17726749084475968
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,247,5134.22,247000,1.43,7,-4,419.96


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-06_15-36-29
  done: false
  episode_len_mean: 420.99
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.5
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 570
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7805629266632927
          entropy_coeff: 0.009999999999999998
          kl: 0.01048383044334642
          policy_loss: -0.03924378504355749
          total_loss: 0.2613405943744712
          vf_explained_var: 0.6309980750083923
          vf_loss: 0.31335209243827394
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,248,5155.47,248000,1.5,7,-4,420.99


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-06_15-36-52
  done: false
  episode_len_mean: 416.92
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.62
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 573
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5770594875017803
          entropy_coeff: 0.009999999999999998
          kl: 0.012832997553820589
          policy_loss: -0.11697027534246444
          total_loss: 0.17863952798975838
          vf_explained_var: 0.8637399673461914
          vf_loss: 0.30521359625789857
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,249,5178.63,249000,1.62,7,-4,416.92


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-06_15-37-15
  done: false
  episode_len_mean: 414.59
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.65
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 576
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6143743912378947
          entropy_coeff: 0.009999999999999998
          kl: 0.023034766672452692
          policy_loss: 0.0020492202291886013
          total_loss: 0.42461539266837967
          vf_explained_var: 0.6126837134361267
          vf_loss: 0.4276407450437546
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,250,5202.06,250000,1.65,7,-4,414.59


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-06_15-37-39
  done: false
  episode_len_mean: 410.87
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.75
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 579
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4955595956908332
          entropy_coeff: 0.009999999999999998
          kl: 0.010242087403930553
          policy_loss: -0.04627964571118355
          total_loss: 0.16329300676782926
          vf_explained_var: 0.6443601250648499
          vf_loss: 0.21714561904470125
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,251,5225.68,251000,1.75,7,-4,410.87


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-06_15-38-02
  done: false
  episode_len_mean: 411.74
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.78
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 581
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6414585365189447
          entropy_coeff: 0.009999999999999998
          kl: 0.01338993012471599
          policy_loss: 0.034759603854682714
          total_loss: 0.3785180561658409
          vf_explained_var: 0.8030505776405334
          vf_loss: 0.35052140195750525
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,252,5248.49,252000,1.78,7,-4,411.74


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-06_15-38-25
  done: false
  episode_len_mean: 410.88
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.91
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 584
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6014987296528287
          entropy_coeff: 0.009999999999999998
          kl: 0.013492394645816337
          policy_loss: -0.1626103507147895
          total_loss: -0.059310869582825235
          vf_explained_var: 0.883992612361908
          vf_loss: 0.10958897550072935
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,253,5271.34,253000,1.91,7,-4,410.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-06_15-38-44
  done: false
  episode_len_mean: 409.05
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.91
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 587
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4944244980812074
          entropy_coeff: 0.009999999999999998
          kl: 0.011470902839062823
          policy_loss: -0.03820277262065146
          total_loss: 0.24129727698034711
          vf_explained_var: 0.302024781703949
          vf_loss: 0.2861759235461553
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,254,5290.69,254000,1.91,7,-4,409.05


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-06_15-39-05
  done: false
  episode_len_mean: 407.27
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 1.98
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 589
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.789976261721717
          entropy_coeff: 0.009999999999999998
          kl: 0.010046879028074236
          policy_loss: -0.05773945202430089
          total_loss: 0.11759109364615547
          vf_explained_var: 0.7757536768913269
          vf_loss: 0.18598838568561607
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,255,5311.34,255000,1.98,7,-4,407.27


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-06_15-39-25
  done: false
  episode_len_mean: 403.51
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 2.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 592
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6075809412532382
          entropy_coeff: 0.009999999999999998
          kl: 0.0076357351254213454
          policy_loss: -0.04091589537759622
          total_loss: 0.2059994500544336
          vf_explained_var: 0.6837142705917358
          vf_loss: 0.25748722325596546
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,256,5331.4,256000,2.07,7,-4,403.51




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-06_15-40-04
  done: false
  episode_len_mean: 400.85
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 2.13
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 594
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5827876753277248
          entropy_coeff: 0.009999999999999998
          kl: 0.009288672010151839
          policy_loss: 0.033836421370506284
          total_loss: 0.12317054126825598
          vf_explained_var: 0.9049570560455322
          vf_loss: 0.0984666043271621
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,257,5370.64,257000,2.13,7,-4,400.85


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-06_15-40-25
  done: false
  episode_len_mean: 399.97
  episode_media: {}
  episode_reward_max: 7.0
  episode_reward_mean: 2.14
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 597
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4691212587886386
          entropy_coeff: 0.009999999999999998
          kl: 0.006979093360251751
          policy_loss: -0.021245882246229385
          total_loss: 0.1254440998037656
          vf_explained_var: 0.7898406982421875
          vf_loss: 0.15635057468381192
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,258,5391.97,258000,2.14,7,-4,399.97




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-06_15-41-22
  done: false
  episode_len_mean: 393.93
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.31
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 600
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.568004768424564
          entropy_coeff: 0.009999999999999998
          kl: 0.013379701849715374
          policy_loss: -0.07306657623913553
          total_loss: 0.5226974303523699
          vf_explained_var: 0.6328169703483582
          vf_loss: 0.601799797018369
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,259,5448.58,259000,2.31,10,-4,393.93


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-06_15-41-43
  done: false
  episode_len_mean: 389.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.44
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 603
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.482285287645128
          entropy_coeff: 0.009999999999999998
          kl: 0.012171741590911446
          policy_loss: -0.034854304956065285
          total_loss: 0.1221585122247537
          vf_explained_var: 0.8744493126869202
          vf_loss: 0.16306212455448177
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,260,5469.2,260000,2.44,10,-4,389.31


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-06_15-42-02
  done: false
  episode_len_mean: 387.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.47
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 605
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5889564871788024
          entropy_coeff: 0.009999999999999998
          kl: 0.012682061630487872
          policy_loss: 0.009778976109292773
          total_loss: 0.18385834784971344
          vf_explained_var: 0.667473554611206
          vf_loss: 0.18082753982808855
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,261,5488.73,261000,2.47,10,-4,387.28


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-06_15-42-24
  done: false
  episode_len_mean: 384.77
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.6
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 608
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4819390853246053
          entropy_coeff: 0.009999999999999998
          kl: 0.009558608620285616
          policy_loss: -0.10634719894991981
          total_loss: 0.16439855396747588
          vf_explained_var: 0.8179656863212585
          vf_loss: 0.27867517471313474
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,262,5510.58,262000,2.6,10,-4,384.77




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-06_15-44-27
  done: false
  episode_len_mean: 374.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.82
  episode_reward_min: -4.0
  episodes_this_iter: 5
  episodes_total: 613
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4265837497181362
          entropy_coeff: 0.009999999999999998
          kl: 0.012667885630335857
          policy_loss: 0.11476906140645345
          total_loss: 0.4723599268330468
          vf_explained_var: 0.8668168783187866
          vf_loss: 0.36272552775012123
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,263,5633.97,263000,2.82,10,-4,374.6




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-06_15-45-25
  done: false
  episode_len_mean: 368.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.8
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 616
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.410341027047899
          entropy_coeff: 0.009999999999999998
          kl: 0.011673724232452937
          policy_loss: -0.014420795854594973
          total_loss: 0.24210533127188683
          vf_explained_var: 0.8677324652671814
          vf_loss: 0.2622149690157837
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,264,5691.2,264000,2.8,10,-4,368.31




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-06_15-46-26
  done: false
  episode_len_mean: 363.3
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 2.9
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 619
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6519897447692022
          entropy_coeff: 0.009999999999999998
          kl: 0.007945928351349984
          policy_loss: -0.10040852112902535
          total_loss: 0.22031225967738363
          vf_explained_var: 0.8296580910682678
          vf_loss: 0.33151315504478085
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,265,5752.55,265000,2.9,10,-4,363.3




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-06_15-47-27
  done: false
  episode_len_mean: 358.53
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.05
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 623
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5323172648747763
          entropy_coeff: 0.009999999999999998
          kl: 0.008958193061564214
          policy_loss: 0.021396387117500935
          total_loss: 0.2012006935560041
          vf_explained_var: 0.846246063709259
          vf_loss: 0.1886702968014611
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,266,5813.23,266000,3.05,10,-4,358.53




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-06_15-48-17
  done: false
  episode_len_mean: 358.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.1
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 625
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.6427098910013835
          entropy_coeff: 0.009999999999999998
          kl: 0.010126506831626298
          policy_loss: 0.048211582915650475
          total_loss: 0.256659109890461
          vf_explained_var: 0.5474787950515747
          vf_loss: 0.21757530776990786
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,267,5863.39,267000,3.1,10,-4,358.7


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-06_15-48-36
  done: false
  episode_len_mean: 356.5
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.16
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 627
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.3830525914827982
          entropy_coeff: 0.009999999999999998
          kl: 0.005963053805006741
          policy_loss: -0.13296142025954194
          total_loss: -0.0861344626173377
          vf_explained_var: 0.4489865303039551
          vf_loss: 0.05635923591131965
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,268,5882.26,268000,3.16,10,-4,356.5


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-06_15-48-56
  done: false
  episode_len_mean: 357.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.07
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 630
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4340746402740479
          entropy_coeff: 0.009999999999999998
          kl: 0.006884376636337998
          policy_loss: -0.05769959294961558
          total_loss: 0.22116997755236095
          vf_explained_var: 0.8122016787528992
          vf_loss: 0.28824797063652013
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,269,5902.38,269000,3.07,10,-4,357.7


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-06_15-49-12
  done: false
  episode_len_mean: 360.44
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.02
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 632
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.394374680519104
          entropy_coeff: 0.009999999999999998
          kl: 0.010796308837802832
          policy_loss: 0.07359701262580023
          total_loss: 0.1836264110273785
          vf_explained_var: 0.6089895963668823
          vf_loss: 0.11619102518177694
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,270,5918.52,270000,3.02,10,-4,360.44


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-06_15-49-33
  done: false
  episode_len_mean: 359.88
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.07
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 634
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.634030709001753
          entropy_coeff: 0.009999999999999998
          kl: 0.006378937451827345
          policy_loss: -0.020132802261246575
          total_loss: 0.2970927477710777
          vf_explained_var: 0.5888665914535522
          vf_loss: 0.3289678368303511
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,271,5939.66,271000,3.07,10,-4,359.88


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-06_15-49-55
  done: false
  episode_len_mean: 359.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.05
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 637
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.7194752083884346
          entropy_coeff: 0.009999999999999998
          kl: 0.00627906492447191
          policy_loss: 0.024589606415894298
          total_loss: 0.187931258065833
          vf_explained_var: 0.7805097699165344
          vf_loss: 0.17601037093748648
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,272,5961.28,272000,3.05,10,-4,359.79


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-06_15-50-18
  done: false
  episode_len_mean: 359.25
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.11
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 640
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5600853403409323
          entropy_coeff: 0.009999999999999998
          kl: 0.00975085980220377
          policy_loss: 0.04476670856691069
          total_loss: 0.26269305013120176
          vf_explained_var: 0.8297775387763977
          vf_loss: 0.22649864852428436
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,273,5984.54,273000,3.11,10,-4,359.25




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-06_15-51-21
  done: false
  episode_len_mean: 355.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.23
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 643
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5256856189833747
          entropy_coeff: 0.009999999999999998
          kl: 0.009733604981716423
          policy_loss: -0.14409605372283193
          total_loss: 0.07864438547856278
          vf_explained_var: 0.7115103602409363
          vf_loss: 0.23098118597020706
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,274,6047.16,274000,3.23,10,-4,355.99




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-06_15-52-50
  done: false
  episode_len_mean: 351.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.29
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 647
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.624490745862325
          entropy_coeff: 0.009999999999999998
          kl: 0.011854845908697195
          policy_loss: 0.062030095193121165
          total_loss: 0.23066684442261856
          vf_explained_var: 0.9107239842414856
          vf_loss: 0.1763365244285928
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,275,6136.38,275000,3.29,10,-4,351.6


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-06_15-53-10
  done: false
  episode_len_mean: 355.54
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.26
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 649
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.528537486659156
          entropy_coeff: 0.009999999999999998
          kl: 0.008848168754145489
          policy_loss: -0.0019972315678993863
          total_loss: 0.13412226852443482
          vf_explained_var: 0.8560252785682678
          vf_loss: 0.14502700029147997
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,276,6156.6,276000,3.26,10,-4,355.54


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-06_15-53-31
  done: false
  episode_len_mean: 355.39
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.16
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 652
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.3957236303223504
          entropy_coeff: 0.009999999999999998
          kl: 0.009258410773163916
          policy_loss: -0.023570441951354344
          total_loss: 0.13767235146628487
          vf_explained_var: 0.8353255391120911
          vf_loss: 0.16852644756436347
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,277,6177.47,277000,3.16,10,-4,355.39


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-06_15-53-51
  done: false
  episode_len_mean: 356.62
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.19
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 654
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5167070653703478
          entropy_coeff: 0.009999999999999998
          kl: 0.010807097396145604
          policy_loss: 0.00459175631403923
          total_loss: 0.19467181091507277
          vf_explained_var: 0.8160535097122192
          vf_loss: 0.19745722694529427
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,278,6197.19,278000,3.19,10,-4,356.62


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-06_15-54-14
  done: false
  episode_len_mean: 357.1
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.28
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 657
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.5745687007904052
          entropy_coeff: 0.009999999999999998
          kl: 0.012101206972253328
          policy_loss: -0.13586597707536485
          total_loss: -0.0069079882775743805
          vf_explained_var: 0.8837577700614929
          vf_loss: 0.1359809694604741
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,279,6219.96,279000,3.28,10,-4,357.1




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-06_15-55-33
  done: false
  episode_len_mean: 352.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.4
  episode_reward_min: -4.0
  episodes_this_iter: 4
  episodes_total: 661
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4741780042648316
          entropy_coeff: 0.009999999999999998
          kl: 0.010072419594918086
          policy_loss: -0.1703796790705787
          total_loss: 0.06806797397633393
          vf_explained_var: 0.8001817464828491
          vf_loss: 0.24592910317911043
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,280,6298.79,280000,3.4,10,-4,352.7


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-06_15-55-55
  done: false
  episode_len_mean: 354.23
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.38
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 664
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4143705368041992
          entropy_coeff: 0.009999999999999998
          kl: 0.011758093137970329
          policy_loss: 0.061229637430773844
          total_loss: 0.24058679615457854
          vf_explained_var: 0.8527283668518066
          vf_loss: 0.18502547736797068
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,281,6321.31,281000,3.38,10,-4,354.23




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-06_15-56-57
  done: false
  episode_len_mean: 351.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.4
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 667
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.4574663546350268
          entropy_coeff: 0.009999999999999998
          kl: 0.006475855048009017
          policy_loss: -0.06720246287683646
          total_loss: 0.1080109816768931
          vf_explained_var: 0.869222104549408
          vf_loss: 0.1851202296713988
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,282,6382.92,282000,3.4,10,-4,351.03


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-06_15-57-21
  done: false
  episode_len_mean: 350.52
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.48
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.3819619099299112
          entropy_coeff: 0.009999999999999998
          kl: 0.007265701144803908
          policy_loss: -0.11043927189376619
          total_loss: 0.034486665783656965
          vf_explained_var: 0.8872184753417969
          vf_loss: 0.15350834735565716
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,283,6407.08,283000,3.48,10,-4,350.52


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-06_15-57-41
  done: false
  episode_len_mean: 350.76
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.5
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 673
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.327705935637156
          entropy_coeff: 0.009999999999999998
          kl: 0.01121060131859445
          policy_loss: -0.038133108533091015
          total_loss: 0.1822519310646587
          vf_explained_var: 0.7423457503318787
          vf_loss: 0.2255813534061114
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,284,6427.58,284000,3.5,10,-4,350.76




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-06_15-58-46
  done: false
  episode_len_mean: 349.18
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.59
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 676
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7208129882812502
          cur_lr: 5.000000000000001e-05
          entropy: 1.370302599006229
          entropy_coeff: 0.009999999999999998
          kl: 0.021748061639625386
          policy_loss: -0.004416384796301524
          total_loss: 0.34703451540941993
          vf_explained_var: 0.5058361291885376
          vf_loss: 0.349477647865812
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,285,6491.9,285000,3.59,10,-4,349.18


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-06_15-59-07
  done: false
  episode_len_mean: 350.72
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.58
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 679
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.5642086002561781
          entropy_coeff: 0.009999999999999998
          kl: 0.008496297681765854
          policy_loss: 0.011351447221305636
          total_loss: 0.15297594029042455
          vf_explained_var: 0.9022506475448608
          vf_loss: 0.14808021742436622
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,286,6512.91,286000,3.58,10,-4,350.72




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-06_16-00-03
  done: false
  episode_len_mean: 349.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.56
  episode_reward_min: -4.0
  episodes_this_iter: 2
  episodes_total: 681
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.4348193248112997
          entropy_coeff: 0.009999999999999998
          kl: 0.02017245132526989
          policy_loss: -0.010848510969016288
          total_loss: 0.4839755290912257
          vf_explained_var: 0.7992798686027527
          vf_loss: 0.48736138625277414
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,287,6568.81,287000,3.56,10,-4,349.03


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-06_16-00-23
  done: false
  episode_len_mean: 350.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.58
  episode_reward_min: -4.0
  episodes_this_iter: 3
  episodes_total: 684
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6218292236328118
          cur_lr: 5.000000000000001e-05
          entropy: 1.0346508151955074
          entropy_coeff: 0.009999999999999998
          kl: 0.0046258398056348285
          policy_loss: -0.1308536926905314
          total_loss: -0.009126826375722884
          vf_explained_var: 0.7644844055175781
          vf_loss: 0.12457105360097355
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,288,6588.71,288000,3.58,10,-4,350.84


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-06_16-00-38
  done: false
  episode_len_mean: 352.96
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.64
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 686
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.0668544054031373
          entropy_coeff: 0.009999999999999998
          kl: 0.007754034035629647
          policy_loss: -0.11402069545454449
          total_loss: 0.027680615045958094
          vf_explained_var: 0.7819294333457947
          vf_loss: 0.14608199667159674
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,289,6604.19,289000,3.64,10,-5,352.96




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-06_16-01-32
  done: false
  episode_len_mean: 351.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.56
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 689
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2376967244678074
          entropy_coeff: 0.009999999999999998
          kl: 0.021187483656466395
          policy_loss: 0.12317179549071523
          total_loss: 0.38849691030465894
          vf_explained_var: 0.5116358399391174
          vf_loss: 0.26052084436847106
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,290,6657.97,290000,3.56,10,-10,351.79




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-06_16-02-33
  done: false
  episode_len_mean: 349.81
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.52
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 692
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3433722694714865
          entropy_coeff: 0.009999999999999998
          kl: 0.01037722179817939
          policy_loss: 0.07389433946874406
          total_loss: 0.31041123213039507
          vf_explained_var: 0.6135828495025635
          vf_loss: 0.23732805794311895
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,291,6718.93,291000,3.52,10,-10,349.81




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-06_16-03-31
  done: false
  episode_len_mean: 346.63
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.57
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 695
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.2036692414018844
          entropy_coeff: 0.009999999999999998
          kl: 0.008226369288330204
          policy_loss: -0.3239123629199134
          total_loss: 0.10257713649835852
          vf_explained_var: 0.60862797498703
          vf_loss: 0.4285198698441188
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,292,6777.15,292000,3.57,10,-10,346.63


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-06_16-03-47
  done: false
  episode_len_mean: 350.26
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.33
  episode_reward_min: -19.0
  episodes_this_iter: 2
  episodes_total: 697
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.1119874960846372
          entropy_coeff: 0.009999999999999998
          kl: 0.009730506883105377
          policy_loss: -0.059902429083983105
          total_loss: 0.12491568449056811
          vf_explained_var: 0.44606316089630127
          vf_loss: 0.18410207620925373
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,293,6793.21,293000,3.33,10,-19,350.26




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-06_16-05-31
  done: false
  episode_len_mean: 344.67
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.45
  episode_reward_min: -19.0
  episodes_this_iter: 5
  episodes_total: 702
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.124357400337855
          entropy_coeff: 0.009999999999999998
          kl: 0.004028810953653463
          policy_loss: 0.18653010775645573
          total_loss: 0.2955810974041621
          vf_explained_var: 0.8182350993156433
          vf_loss: 0.11539403159792225
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,294,6896.76,294000,3.45,10,-19,344.67


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-06_16-05-50
  done: false
  episode_len_mean: 344.63
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.36
  episode_reward_min: -19.0
  episodes_this_iter: 2
  episodes_total: 704
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3055957516034444
          entropy_coeff: 0.009999999999999998
          kl: 0.011349736959017222
          policy_loss: -0.28223281113637816
          total_loss: -0.0009646870195865632
          vf_explained_var: 0.7192257642745972
          vf_loss: 0.2874213377220763
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,295,6915.48,295000,3.36,10,-19,344.63




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-06_16-06-47
  done: false
  episode_len_mean: 343.21
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.14
  episode_reward_min: -19.0
  episodes_this_iter: 3
  episodes_total: 707
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2408648755815295
          entropy_coeff: 0.009999999999999998
          kl: 0.015931016084490126
          policy_loss: -0.03347990976439582
          total_loss: 0.3129203280640973
          vf_explained_var: 0.4957929849624634
          vf_loss: 0.34911986506647535
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,296,6972.54,296000,3.14,10,-19,343.21




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-06_16-09-05
  done: false
  episode_len_mean: 335.98
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.33
  episode_reward_min: -19.0
  episodes_this_iter: 7
  episodes_total: 714
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2590063585175408
          entropy_coeff: 0.009999999999999998
          kl: 0.0135673984024952
          policy_loss: 0.06766288015577528
          total_loss: 0.49781326833698486
          vf_explained_var: 0.9314782619476318
          vf_loss: 0.4344889546434085
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,297,7110.97,297000,3.33,10,-19,335.98




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-06_16-09-49
  done: false
  episode_len_mean: 337.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.23
  episode_reward_min: -19.0
  episodes_this_iter: 2
  episodes_total: 716
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2157577481534747
          entropy_coeff: 0.009999999999999998
          kl: 0.024426871905718597
          policy_loss: 0.07369429336653815
          total_loss: 0.6207766611542966
          vf_explained_var: 0.6565805077552795
          vf_loss: 0.5443838660915693
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,298,7155.2,298000,3.23,10,-19,337.03




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-06_16-11-50
  done: false
  episode_len_mean: 327.18
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.34
  episode_reward_min: -19.0
  episodes_this_iter: 8
  episodes_total: 724
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.2551017264525095
          entropy_coeff: 0.009999999999999998
          kl: 0.00947064329585159
          policy_loss: -0.0698968936999639
          total_loss: 0.3935093881562352
          vf_explained_var: 0.6252464056015015
          vf_loss: 0.46731742877099247
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,299,7276.13,299000,3.34,10,-19,327.18


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-06_16-12-05
  done: false
  episode_len_mean: 327.98
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.16
  episode_reward_min: -19.0
  episodes_this_iter: 1
  episodes_total: 725
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.4623382012049357
          entropy_coeff: 0.009999999999999998
          kl: 0.01688666004115053
          policy_loss: 0.10404444336891175
          total_loss: 0.4688803907897737
          vf_explained_var: 0.5862985253334045
          vf_loss: 0.36405398638712033
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,300,7290.96,300000,3.16,10,-19,327.98




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-06_16-13-20
  done: false
  episode_len_mean: 324.81
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.15
  episode_reward_min: -19.0
  episodes_this_iter: 4
  episodes_total: 729
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.4522547509935166
          entropy_coeff: 0.009999999999999998
          kl: 0.011868962232623126
          policy_loss: -0.07418458147181405
          total_loss: 0.24709044208543168
          vf_explained_var: 0.7495694756507874
          vf_loss: 0.32496976984871756
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,301,7365.64,301000,3.15,10,-19,324.81




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-06_16-17-29
  done: false
  episode_len_mean: 285.25
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.82
  episode_reward_min: -19.0
  episodes_this_iter: 13
  episodes_total: 742
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.1746571017636194
          entropy_coeff: 0.009999999999999998
          kl: 0.0164227700727474
          policy_loss: 0.0027149746815363566
          total_loss: 0.38558704290125106
          vf_explained_var: 0.8520147800445557
          vf_loss: 0.3796364893515905
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,302,7614.31,302000,3.82,10,-19,285.25




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-06_16-18-10
  done: false
  episode_len_mean: 287.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.68
  episode_reward_min: -19.0
  episodes_this_iter: 2
  episodes_total: 744
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.1017085777388678
          entropy_coeff: 0.009999999999999998
          kl: 0.0123465581379687
          policy_loss: -0.08640804919931623
          total_loss: 0.3559584463222159
          vf_explained_var: 0.7659948468208313
          vf_loss: 0.4421200793650415
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,303,7655.39,303000,3.68,10,-19,287.03




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-06_16-19-38
  done: false
  episode_len_mean: 277.74
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 3.78
  episode_reward_min: -19.0
  episodes_this_iter: 6
  episodes_total: 750
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.3838346163431803
          entropy_coeff: 0.009999999999999998
          kl: 0.01593318519785213
          policy_loss: 0.08578578109542528
          total_loss: 0.351652571807305
          vf_explained_var: 0.6141075491905212
          vf_loss: 0.26516962945461275
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,304,7744.22,304000,3.78,10,-19,277.74




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-06_16-22-29
  done: false
  episode_len_mean: 255.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.09
  episode_reward_min: -19.0
  episodes_this_iter: 9
  episodes_total: 759
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.349812462594774
          entropy_coeff: 0.009999999999999998
          kl: 0.01424433111538082
          policy_loss: -0.1157142589489619
          total_loss: 0.23719272495557864
          vf_explained_var: 0.5153036117553711
          vf_loss: 0.3534103030959765
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,305,7915.21,305000,4.09,10,-19,255.99




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-06_16-24-01
  done: false
  episode_len_mean: 248.0
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.19
  episode_reward_min: -19.0
  episodes_this_iter: 5
  episodes_total: 764
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.1152304808298747
          entropy_coeff: 0.009999999999999998
          kl: 0.02613986302586117
          policy_loss: -0.037515283789899614
          total_loss: 1.317635363340378
          vf_explained_var: 0.8681626915931702
          vf_loss: 1.3424561301867166
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,306,8006.28,306000,4.19,10,-19,248




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-06_16-25-53
  done: false
  episode_len_mean: 237.08
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.18
  episode_reward_min: -19.0
  episodes_this_iter: 7
  episodes_total: 771
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 1.147027807103263
          entropy_coeff: 0.009999999999999998
          kl: 0.008544963948250049
          policy_loss: -0.02747060428890917
          total_loss: 0.44427492568890253
          vf_explained_var: 0.863278865814209
          vf_loss: 0.47152272661527
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,307,8118.59,307000,4.18,10,-19,237.08




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-06_16-28-16
  done: false
  episode_len_mean: 218.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.49
  episode_reward_min: -19.0
  episodes_this_iter: 8
  episodes_total: 779
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3684184074401855
          cur_lr: 5.000000000000001e-05
          entropy: 0.8930764648649427
          entropy_coeff: 0.009999999999999998
          kl: 0.02126606688661915
          policy_loss: 0.023411026762591467
          total_loss: 1.3072572569052379
          vf_explained_var: 0.8183909058570862
          vf_loss: 1.2636761128902436
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,308,8261.22,308000,4.49,10,-19,218.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-06_16-29-06
  done: false
  episode_len_mean: 218.42
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.54
  episode_reward_min: -19.0
  episodes_this_iter: 2
  episodes_total: 781
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.0933166576756372
          entropy_coeff: 0.009999999999999998
          kl: 0.00990755978021964
          policy_loss: 0.011152873188257218
          total_loss: 0.6494562087787522
          vf_explained_var: 0.6010357141494751
          vf_loss: 0.628899968167146
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,309,8311.62,309000,4.54,10,-19,218.42




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-06_16-30-35
  done: false
  episode_len_mean: 203.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.92
  episode_reward_min: -19.0
  episodes_this_iter: 6
  episodes_total: 787
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.0617945280339982
          entropy_coeff: 0.009999999999999998
          kl: 0.008038321174635875
          policy_loss: -0.0399888245595826
          total_loss: 0.649500594039758
          vf_explained_var: 0.8186778426170349
          vf_loss: 0.6836076882150438
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,310,8400.62,310000,4.92,10,-19,203.71




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-06_16-31-30
  done: false
  episode_len_mean: 202.18
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 4.92
  episode_reward_min: -19.0
  episodes_this_iter: 4
  episodes_total: 791
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.5022722178035313
          entropy_coeff: 0.009999999999999998
          kl: 0.007743118452307495
          policy_loss: -0.11132819006840387
          total_loss: 0.37799795385864043
          vf_explained_var: 0.7590779066085815
          vf_loss: 0.4884551222125689
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,311,8455.67,311000,4.92,10,-19,202.18




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-06_16-34-13
  done: false
  episode_len_mean: 181.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.39
  episode_reward_min: -19.0
  episodes_this_iter: 9
  episodes_total: 800
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.0105137321684095
          entropy_coeff: 0.009999999999999998
          kl: 0.009796740423229582
          policy_loss: 0.045568224787712094
          total_loss: 0.4658300408886539
          vf_explained_var: 0.8574662208557129
          vf_loss: 0.4102578901582294
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,312,8618.78,312000,5.39,10,-19,181.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-06_16-37-00
  done: false
  episode_len_mean: 166.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 5.97
  episode_reward_min: -19.0
  episodes_this_iter: 9
  episodes_total: 809
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.007694787449307
          entropy_coeff: 0.009999999999999998
          kl: 0.005750307756586142
          policy_loss: -0.16249897927045823
          total_loss: 0.3115932982828882
          vf_explained_var: 0.8894006609916687
          vf_loss: 0.47236598531405133
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,313,8785.26,313000,5.97,10,-19,166.28




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-06_16-38-29
  done: false
  episode_len_mean: 165.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.05
  episode_reward_min: -19.0
  episodes_this_iter: 5
  episodes_total: 814
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.052627611160279
          cur_lr: 5.000000000000001e-05
          entropy: 1.1746990892622207
          entropy_coeff: 0.009999999999999998
          kl: 0.0035366948914646594
          policy_loss: 0.048251047854622205
          total_loss: 0.41330663851565785
          vf_explained_var: 0.7081131935119629
          vf_loss: 0.3695430571834246
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,314,8874.07,314000,6.05,10,-19,165.7




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-06_16-40-17
  done: false
  episode_len_mean: 163.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.21
  episode_reward_min: -19.0
  episodes_this_iter: 7
  episodes_total: 821
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.9679742925696903
          entropy_coeff: 0.009999999999999998
          kl: 0.01621156422511625
          policy_loss: 0.008306838240888384
          total_loss: 0.6636629727151658
          vf_explained_var: 0.7957344055175781
          vf_loss: 0.648397723502583
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,315,8982.4,315000,6.21,10,-19,163.99




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-06_16-42-04
  done: false
  episode_len_mean: 153.8
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.72
  episode_reward_min: -16.0
  episodes_this_iter: 5
  episodes_total: 826
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.7479699671268463
          entropy_coeff: 0.009999999999999998
          kl: 0.005455730060836217
          policy_loss: 0.004626059118244383
          total_loss: 0.8459525034659439
          vf_explained_var: 0.878835916519165
          vf_loss: 0.8432068462173145
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,316,9089.57,316000,6.72,10,-16,153.8




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-06_16-43-08
  done: false
  episode_len_mean: 155.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.91
  episode_reward_min: -16.0
  episodes_this_iter: 4
  episodes_total: 830
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.1759015070067511
          entropy_coeff: 0.009999999999999998
          kl: 0.016531276832871238
          policy_loss: 0.06017685114509529
          total_loss: 0.9137505012874801
          vf_explained_var: 0.6707587242126465
          vf_loss: 0.8483664012617536
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,317,9153.95,317000,6.91,10,-16,155.79




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-06_16-45-12
  done: false
  episode_len_mean: 163.19
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.82
  episode_reward_min: -16.0
  episodes_this_iter: 7
  episodes_total: 837
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 0.8717543734444513
          entropy_coeff: 0.009999999999999998
          kl: 0.004808365316232531
          policy_loss: 0.11154438575936688
          total_loss: 0.8458969420442979
          vf_explained_var: 0.4088207185268402
          vf_loss: 0.7381352003353338
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,318,9277.82,318000,6.82,10,-16,163.19




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-06_16-46-45
  done: false
  episode_len_mean: 166.55
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 6.77
  episode_reward_min: -16.0
  episodes_this_iter: 5
  episodes_total: 842
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.9827185193697612
          entropy_coeff: 0.009999999999999998
          kl: 0.010129956094601668
          policy_loss: -0.008066688146856096
          total_loss: 0.6088544910152753
          vf_explained_var: 0.9194133281707764
          vf_loss: 0.6215501078301006
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,319,9369.93,319000,6.77,10,-16,166.55




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-06_16-47-36
  done: false
  episode_len_mean: 161.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.17
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 846
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.1120110266738468
          entropy_coeff: 0.009999999999999998
          kl: 0.014183605391355186
          policy_loss: 0.10728251760204634
          total_loss: 0.39157423608832886
          vf_explained_var: 0.5617256164550781
          vf_loss: 0.28813340990907615
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,320,9421.48,320000,7.17,10,-11,161.91




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-06_16-51-14
  done: false
  episode_len_mean: 158.57
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.31
  episode_reward_min: -11.0
  episodes_this_iter: 11
  episodes_total: 857
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.6677364607652029
          entropy_coeff: 0.009999999999999998
          kl: 0.007782462113953632
          policy_loss: -0.11224654217561086
          total_loss: 0.6394668887058894
          vf_explained_var: 0.92441725730896
          vf_loss: 0.7543971757094066
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,321,9639.68,321000,7.31,10,-11,158.57




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-06_16-53-40
  done: false
  episode_len_mean: 153.67
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.7
  episode_reward_min: -11.0
  episodes_this_iter: 8
  episodes_total: 865
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.8259361922740937
          entropy_coeff: 0.009999999999999998
          kl: 0.004877089477808422
          policy_loss: -0.1495206269952986
          total_loss: 0.17811297666695383
          vf_explained_var: 0.745003342628479
          vf_loss: 0.3333902570936415
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,322,9784.99,322000,7.7,10,-11,153.67




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-06_16-54-13
  done: false
  episode_len_mean: 159.79
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 7.65
  episode_reward_min: -11.0
  episodes_this_iter: 2
  episodes_total: 867
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.7967734201086892
          entropy_coeff: 0.009999999999999998
          kl: 0.005533107912645777
          policy_loss: 0.012023859988484117
          total_loss: 0.13891808005670706
          vf_explained_var: -0.011846128851175308
          vf_loss: 0.13344227638509537
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,323,9818.18,323000,7.65,10,-11,159.79




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-06_17-01-11
  done: false
  episode_len_mean: 136.66
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.27
  episode_reward_min: -10.0
  episodes_this_iter: 23
  episodes_total: 890
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 0.4853212998972999
          entropy_coeff: 0.009999999999999998
          kl: 0.004297921952065634
          policy_loss: -0.06836126786139277
          total_loss: 0.14489054195582868
          vf_explained_var: 0.97083580493927
          vf_loss: 0.21700226941870318
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,324,10236.7,324000,8.27,10,-10,136.66




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-06_17-04-12
  done: false
  episode_len_mean: 130.36
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.6
  episode_reward_min: -5.0
  episodes_this_iter: 10
  episodes_total: 900
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 1.0977517435948054
          entropy_coeff: 0.009999999999999998
          kl: 0.030529697242377217
          policy_loss: 0.022483853995800017
          total_loss: 0.8173236153191991
          vf_explained_var: 0.6591564416885376
          vf_loss: 0.8019006436069807
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,325,10417.1,325000,8.6,10,-5,130.36




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-06_17-07-50
  done: false
  episode_len_mean: 120.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.82
  episode_reward_min: -4.0
  episodes_this_iter: 11
  episodes_total: 911
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.6031590074300766
          entropy_coeff: 0.009999999999999998
          kl: 0.009893416459124754
          policy_loss: -0.11557549950149325
          total_loss: 0.00806232632862197
          vf_explained_var: 0.3130629062652588
          vf_loss: 0.127765588917666
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,326,10635,326000,8.82,10,-4,120.7




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-06_17-10-33
  done: false
  episode_len_mean: 116.47
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.02
  episode_reward_min: -4.0
  episodes_this_iter: 9
  episodes_total: 920
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.7476299623648326
          entropy_coeff: 0.009999999999999998
          kl: 0.008091808413748463
          policy_loss: -0.05792140364646912
          total_loss: 0.19216735776927735
          vf_explained_var: 0.7119265794754028
          vf_loss: 0.2560079181773795
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,327,10798.1,327000,9.02,10,-4,116.47


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-06_17-10-46
  done: false
  episode_len_mean: 123.62
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 8.92
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 921
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.9604849835236867
          entropy_coeff: 0.009999999999999998
          kl: 0.014525803638165947
          policy_loss: 0.06551868865887324
          total_loss: 0.20870353562964333
          vf_explained_var: 0.4842485189437866
          vf_loss: 0.14999443996283743
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,328,10811.2,328000,8.92,10,-4,123.62




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-06_17-13-09
  done: false
  episode_len_mean: 119.72
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.04
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 930
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 1.6225079483456082
          entropy_coeff: 0.009999999999999998
          kl: 0.010421413084264506
          policy_loss: 0.20220056399703026
          total_loss: 0.4370239515271452
          vf_explained_var: 0.8751910924911499
          vf_loss: 0.2490430316577355
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,329,10954.4,329000,9.04,10,-2,119.72




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-06_17-15-14
  done: false
  episode_len_mean: 113.01
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.14
  episode_reward_min: -2.0
  episodes_this_iter: 6
  episodes_total: 936
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 1.1246245145797729
          entropy_coeff: 0.009999999999999998
          kl: 0.014237908723417587
          policy_loss: -0.04249945547845629
          total_loss: 0.31947559813658394
          vf_explained_var: 0.6494008302688599
          vf_loss: 0.37048144878612627
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,330,11079.4,330000,9.14,10,-2,113.01




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-06_17-17-39
  done: false
  episode_len_mean: 110.68
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.18
  episode_reward_min: -2.0
  episodes_this_iter: 9
  episodes_total: 945
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 1.218409393231074
          entropy_coeff: 0.009999999999999998
          kl: 0.01869466506145599
          policy_loss: 0.14817738127377297
          total_loss: 0.32394151012930605
          vf_explained_var: 0.7300186157226562
          vf_loss: 0.18435073635644383
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,331,11224.3,331000,9.18,10,-2,110.68




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-06_17-19-06
  done: false
  episode_len_mean: 110.81
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.17
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 949
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.8892097347312503
          entropy_coeff: 0.009999999999999998
          kl: 0.012928404233480961
          policy_loss: -0.07825385919875569
          total_loss: 0.10572686253322496
          vf_explained_var: 0.0851082131266594
          vf_loss: 0.1903849585706161
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,332,11310.6,332000,9.17,10,-2,110.81




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-06_17-22-46
  done: false
  episode_len_mean: 112.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.11
  episode_reward_min: -2.0
  episodes_this_iter: 12
  episodes_total: 961
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 0.9677966800000932
          entropy_coeff: 0.009999999999999998
          kl: 0.01534076403112243
          policy_loss: -0.04250385347339842
          total_loss: 0.5357241669462787
          vf_explained_var: 0.932270884513855
          vf_loss: 0.5849539091189703
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,333,11531.4,333000,9.11,10,-2,112.84




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-06_17-25-12
  done: false
  episode_len_mean: 109.34
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.09
  episode_reward_min: -2.0
  episodes_this_iter: 8
  episodes_total: 969
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 1.1844646732012432
          entropy_coeff: 0.009999999999999998
          kl: 0.023198392397958994
          policy_loss: -0.023648806744151644
          total_loss: 0.40907857161429195
          vf_explained_var: 0.6867461800575256
          vf_loss: 0.4401078663766384
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,334,11677.2,334000,9.09,10,-2,109.34




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-06_17-26-20
  done: false
  episode_len_mean: 117.58
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.06
  episode_reward_min: -2.0
  episodes_this_iter: 4
  episodes_total: 973
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.1975420415401459
          entropy_coeff: 0.009999999999999998
          kl: 0.00813785930497678
          policy_loss: -0.18651367533538077
          total_loss: -0.08986349710159831
          vf_explained_var: 0.22787073254585266
          vf_loss: 0.10627659489659386
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,335,11744.9,335000,9.06,10,-2,117.58




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-06_17-32-45
  done: false
  episode_len_mean: 113.05
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.16
  episode_reward_min: -2.0
  episodes_this_iter: 20
  episodes_total: 993
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.41970283256636726
          entropy_coeff: 0.009999999999999998
          kl: 0.005749512251039748
          policy_loss: -0.1588339311381181
          total_loss: 0.05428872087763415
          vf_explained_var: 0.9717044234275818
          vf_loss: 0.21566007725066608
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,336,12129.6,336000,9.16,10,-2,113.05




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-06_17-36-41
  done: false
  episode_len_mean: 117.69
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.09
  episode_reward_min: -2.0
  episodes_this_iter: 12
  episodes_total: 1005
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.912316173977322
          entropy_coeff: 0.009999999999999998
          kl: 0.008736185417021562
          policy_loss: -0.20199802768313221
          total_loss: -0.054871047867669
          vf_explained_var: 0.659887433052063
          vf_loss: 0.15372843398816055
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,337,12366.1,337000,9.09,10,-2,117.69




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-06_17-41-55
  done: false
  episode_len_mean: 98.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.28
  episode_reward_min: 1.0
  episodes_this_iter: 17
  episodes_total: 1022
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.8015269279479981
          entropy_coeff: 0.009999999999999998
          kl: 0.0120219033577261
          policy_loss: 0.02231642338964674
          total_loss: 0.1363212063908577
          vf_explained_var: 0.9359458088874817
          vf_loss: 0.11854992450939285
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,338,12680.5,338000,9.28,10,1,98.71




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-06_17-45-15
  done: false
  episode_len_mean: 96.34
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.33
  episode_reward_min: 2.0
  episodes_this_iter: 10
  episodes_total: 1032
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.0080890549553765
          entropy_coeff: 0.009999999999999998
          kl: 0.013269819887774916
          policy_loss: -0.09986715664466222
          total_loss: 0.07391123258405262
          vf_explained_var: 0.9836329817771912
          vf_loss: 0.18002894181344245
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,339,12880.2,339000,9.33,10,2,96.34




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-06_17-50-45
  done: false
  episode_len_mean: 87.92
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.44
  episode_reward_min: 2.0
  episodes_this_iter: 17
  episodes_total: 1049
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.8469805538654327
          entropy_coeff: 0.009999999999999998
          kl: 0.019078154780535685
          policy_loss: 0.0028832776678933037
          total_loss: 0.3131580679780907
          vf_explained_var: 0.6225775480270386
          vf_loss: 0.3132376682427194
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,340,13210,340000,9.44,10,2,87.92




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-06_17-59-23
  done: false
  episode_len_mean: 58.96
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.72
  episode_reward_min: 2.0
  episodes_this_iter: 27
  episodes_total: 1076
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.2780158370733261
          entropy_coeff: 0.009999999999999998
          kl: 0.006785574046519906
          policy_loss: 0.033510510209533904
          total_loss: 0.11070260893967417
          vf_explained_var: 0.9876610040664673
          vf_loss: 0.07801359650782413
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,341,13728.2,341000,9.72,10,2,58.96




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-06_18-00-49
  done: false
  episode_len_mean: 58.99
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.71
  episode_reward_min: 2.0
  episodes_this_iter: 4
  episodes_total: 1080
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.2756630692217086
          entropy_coeff: 0.009999999999999998
          kl: 0.012521930630178129
          policy_loss: -0.08664227889643775
          total_loss: -0.031017057845989862
          vf_explained_var: 0.4983328580856323
          vf_loss: 0.0647673876852625
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,342,13814.4,342000,9.71,10,2,58.99




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-06_18-05-57
  done: false
  episode_len_mean: 65.27
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.72
  episode_reward_min: 2.0
  episodes_this_iter: 17
  episodes_total: 1097
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 0.4480274332894219
          entropy_coeff: 0.009999999999999998
          kl: 0.003846456927931068
          policy_loss: -0.06037242785096168
          total_loss: 0.11668261446886592
          vf_explained_var: 0.9851835370063782
          vf_loss: 0.18042503108994828
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,343,14122.2,343000,9.72,10,2,65.27




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-06_18-09-16
  done: false
  episode_len_mean: 67.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.7
  episode_reward_min: 0.0
  episodes_this_iter: 11
  episodes_total: 1108
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 1.0958480642901527
          entropy_coeff: 0.009999999999999998
          kl: 0.027255506674970485
          policy_loss: -0.02708881100018819
          total_loss: 0.1262448936700821
          vf_explained_var: 0.8211331963539124
          vf_loss: 0.16035852486060725
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,344,14320.8,344000,9.7,10,0,67.29




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-06_18-10-43
  done: false
  episode_len_mean: 73.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.67
  episode_reward_min: 0.0
  episodes_this_iter: 5
  episodes_total: 1113
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 1.2933816605144077
          entropy_coeff: 0.009999999999999998
          kl: 0.020071450099794833
          policy_loss: -0.056552247868643865
          total_loss: 0.07029434757100211
          vf_explained_var: 0.9293187260627747
          vf_loss: 0.13543518282887007
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,345,14408.3,345000,9.67,10,0,73.29




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-06_18-15-56
  done: false
  episode_len_mean: 73.38
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.7
  episode_reward_min: 0.0
  episodes_this_iter: 17
  episodes_total: 1130
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 0.934649983048439
          entropy_coeff: 0.009999999999999998
          kl: 0.004268894003203528
          policy_loss: 0.009153933823108673
          total_loss: 0.045312822196218704
          vf_explained_var: 0.32941216230392456
          vf_loss: 0.044119143494430725
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,346,14721,346000,9.7,10,0,73.38




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-06_18-21-48
  done: false
  episode_len_mean: 67.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.75
  episode_reward_min: 0.0
  episodes_this_iter: 18
  episodes_total: 1148
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 0.31344637274742126
          entropy_coeff: 0.009999999999999998
          kl: 0.004869590989207712
          policy_loss: -0.18380817787514792
          total_loss: -0.1259434224002891
          vf_explained_var: 0.9936785101890564
          vf_loss: 0.06020855935704377
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,347,15072.7,347000,9.75,10,0,67.31




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-06_18-28-37
  done: false
  episode_len_mean: 72.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.75
  episode_reward_min: 0.0
  episodes_this_iter: 22
  episodes_total: 1170
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0811830256367102
          cur_lr: 5.000000000000001e-05
          entropy: 0.5685383621189329
          entropy_coeff: 0.009999999999999998
          kl: 0.017116240109377075
          policy_loss: 0.028635855639974277
          total_loss: 0.04020042291118039
          vf_explained_var: 0.998080849647522
          vf_loss: 0.015860402351245285
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,348,15481.9,348000,9.75,10,0,72.14




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-06_18-36-05
  done: false
  episode_len_mean: 64.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.77
  episode_reward_min: 0.0
  episodes_this_iter: 23
  episodes_total: 1193
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0811830256367102
          cur_lr: 5.000000000000001e-05
          entropy: 0.4976729267173343
          entropy_coeff: 0.009999999999999998
          kl: 0.007537244277320513
          policy_loss: 0.00835903427667088
          total_loss: 0.0977929233883818
          vf_explained_var: 0.9924057126045227
          vf_loss: 0.09379872118847238
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,349,15930,349000,9.77,10,0,64.91




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-06_18-43-55
  done: false
  episode_len_mean: 44.35
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.96
  episode_reward_min: 8.0
  episodes_this_iter: 24
  episodes_total: 1217
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0811830256367102
          cur_lr: 5.000000000000001e-05
          entropy: 0.2672711397210757
          entropy_coeff: 0.009999999999999998
          kl: 0.003156071503932682
          policy_loss: -0.14335287974940406
          total_loss: -0.11251796326703495
          vf_explained_var: 0.9944213032722473
          vf_loss: 0.03325140828059779
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,350,16399.5,350000,9.96,10,8,44.35




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-06_18-51-56
  done: false
  episode_len_mean: 45.62
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.93
  episode_reward_min: 8.0
  episodes_this_iter: 25
  episodes_total: 1242
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0405915128183551
          cur_lr: 5.000000000000001e-05
          entropy: 0.34249284168084465
          entropy_coeff: 0.009999999999999998
          kl: 0.008640013394516978
          policy_loss: -0.01911449788345231
          total_loss: 0.0036792139212290446
          vf_explained_var: 0.9962595701217651
          vf_loss: 0.02586792834723989
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,351,16880.9,351000,9.93,10,8,45.62




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-06_18-59-49
  done: false
  episode_len_mean: 41.62
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 7.0
  episodes_this_iter: 24
  episodes_total: 1266
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0405915128183551
          cur_lr: 5.000000000000001e-05
          entropy: 0.43699824710687
          entropy_coeff: 0.009999999999999998
          kl: 0.013334546803412966
          policy_loss: 0.05538976403574149
          total_loss: 0.11893540012339751
          vf_explained_var: 0.9935937523841858
          vf_loss: 0.06737435165171822
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,352,17353.4,352000,9.91,10,7,41.62




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-06_19-04-31
  done: false
  episode_len_mean: 45.7
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.89
  episode_reward_min: 7.0
  episodes_this_iter: 15
  episodes_total: 1281
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0405915128183551
          cur_lr: 5.000000000000001e-05
          entropy: 1.1620630704694324
          entropy_coeff: 0.009999999999999998
          kl: 0.027190717072608964
          policy_loss: 0.12025279468960232
          total_loss: 0.1480346476038297
          vf_explained_var: 0.9960489273071289
          vf_loss: 0.03829877431400948
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,353,17636,353000,9.89,10,7,45.7




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-06_19-11-36
  done: false
  episode_len_mean: 44.65
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 7.0
  episodes_this_iter: 22
  episodes_total: 1303
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 0.29364401631885106
          entropy_coeff: 0.009999999999999998
          kl: 0.010418733690456166
          policy_loss: -0.2544620268874698
          total_loss: -0.23633202090859412
          vf_explained_var: 0.9967111349105835
          vf_loss: 0.020432078807304303
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,354,18060.7,354000,9.91,10,7,44.65




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-06_19-15-00
  done: false
  episode_len_mean: 51.93
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 4.0
  episodes_this_iter: 11
  episodes_total: 1314
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 1.0241462906201682
          entropy_coeff: 0.009999999999999998
          kl: 0.02139121975629775
          policy_loss: 0.11349379635519452
          total_loss: 0.19223201647400856
          vf_explained_var: 0.4390571713447571
          vf_loss: 0.08767722740562425
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,355,18265,355000,9.85,10,4,51.93




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-06_19-23-25
  done: false
  episode_len_mean: 50.74
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.88
  episode_reward_min: 4.0
  episodes_this_iter: 26
  episodes_total: 1340
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09133090384129905
          cur_lr: 5.000000000000001e-05
          entropy: 0.2582437339756224
          entropy_coeff: 0.009999999999999998
          kl: 0.005683721345262743
          policy_loss: -0.013356426192654505
          total_loss: -0.004285883179141415
          vf_explained_var: 0.998159646987915
          vf_loss: 0.01113388143065903
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,356,18769.8,356000,9.88,10,4,50.74




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-06_19-31-25
  done: false
  episode_len_mean: 50.09
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 25
  episodes_total: 1365
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09133090384129905
          cur_lr: 5.000000000000001e-05
          entropy: 0.25346632964081234
          entropy_coeff: 0.009999999999999998
          kl: 0.006024752538576836
          policy_loss: 0.040440792300634916
          total_loss: 0.06413704496290949
          vf_explained_var: 0.9965187907218933
          vf_loss: 0.025680671616767844
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,357,19249.5,357000,9.91,10,4,50.09




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-06_19-39-58
  done: false
  episode_len_mean: 45.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.93
  episode_reward_min: 4.0
  episodes_this_iter: 27
  episodes_total: 1392
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09133090384129905
          cur_lr: 5.000000000000001e-05
          entropy: 0.23473120563560063
          entropy_coeff: 0.009999999999999998
          kl: 0.003693263329881279
          policy_loss: -0.046794688618845406
          total_loss: -0.0394003431002299
          vf_explained_var: 0.9983786940574646
          vf_loss: 0.009404347192806502
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,358,19762.6,358000,9.93,10,4,45.91




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-06_19-40-28
  done: false
  episode_len_mean: 55.09
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.9
  episode_reward_min: 4.0
  episodes_this_iter: 2
  episodes_total: 1394
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045665451920649525
          cur_lr: 5.000000000000001e-05
          entropy: 0.6228445318010118
          entropy_coeff: 0.009999999999999998
          kl: 0.015967176336345704
          policy_loss: -0.03839170361558596
          total_loss: 0.0035793816877735985
          vf_explained_var: 0.2624513506889343
          vf_loss: 0.04747038388417827
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,359,19792.5,359000,9.9,10,4,55.09




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-06_19-48-45
  done: false
  episode_len_mean: 47.53
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.96
  episode_reward_min: 7.0
  episodes_this_iter: 26
  episodes_total: 1420
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045665451920649525
          cur_lr: 5.000000000000001e-05
          entropy: 0.24133872704373466
          entropy_coeff: 0.009999999999999998
          kl: 0.0031680704849618674
          policy_loss: -0.022601399425831108
          total_loss: -0.00855522229232722
          vf_explained_var: 0.9968703389167786
          vf_loss: 0.01631489291580187
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,360,20289.3,360000,9.96,10,7,47.53




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-06_19-57-23
  done: false
  episode_len_mean: 47.35
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.96
  episode_reward_min: 7.0
  episodes_this_iter: 27
  episodes_total: 1447
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 0.22329652425315644
          entropy_coeff: 0.009999999999999998
          kl: 0.004372054165244046
          policy_loss: 0.0268270765327745
          total_loss: 0.031395075822042096
          vf_explained_var: 0.9988997578620911
          vf_loss: 0.006701138049053649
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,361,20807.3,361000,9.96,10,7,47.35




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-06_20-05-22
  done: false
  episode_len_mean: 46.88
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.97
  episode_reward_min: 7.0
  episodes_this_iter: 25
  episodes_total: 1472
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011416362980162381
          cur_lr: 5.000000000000001e-05
          entropy: 0.25124536520904966
          entropy_coeff: 0.009999999999999998
          kl: 0.011935507297087809
          policy_loss: -0.2018683296110895
          total_loss: -0.20152758326795367
          vf_explained_var: 0.9996016621589661
          vf_loss: 0.002716939706200113
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,362,21286.7,362000,9.97,10,7,46.88




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-06_20-09-04
  done: false
  episode_len_mean: 52.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 7.0
  episodes_this_iter: 12
  episodes_total: 1484
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011416362980162381
          cur_lr: 5.000000000000001e-05
          entropy: 1.0111489252911674
          entropy_coeff: 0.009999999999999998
          kl: 0.09309087448011921
          policy_loss: 0.06121718337138494
          total_loss: 0.11511788583464093
          vf_explained_var: 0.8217215538024902
          vf_loss: 0.06294942787951893
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,363,21508.2,363000,9.94,10,7,52.87




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-06_20-16-08
  done: false
  episode_len_mean: 44.26
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.97
  episode_reward_min: 7.0
  episodes_this_iter: 22
  episodes_total: 1506
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017124544470243558
          cur_lr: 5.000000000000001e-05
          entropy: 0.34880547324816386
          entropy_coeff: 0.009999999999999998
          kl: 0.012192021968624431
          policy_loss: -0.24423770159482955
          total_loss: -0.2389435009823905
          vf_explained_var: 0.9988594651222229
          vf_loss: 0.008573471609916951
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,364,21932.3,364000,9.97,10,7,44.26




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-06_20-18-20
  done: false
  episode_len_mean: 52.38
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.84
  episode_reward_min: 0.0
  episodes_this_iter: 8
  episodes_total: 1514
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017124544470243558
          cur_lr: 5.000000000000001e-05
          entropy: 1.2170654773712157
          entropy_coeff: 0.009999999999999998
          kl: 0.05717110140088937
          policy_loss: 0.07569731324911118
          total_loss: 0.25001439654992685
          vf_explained_var: 0.8176006078720093
          vf_loss: 0.18550870596534677
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,365,22064.1,365000,9.84,10,0,52.38




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-06_20-19-17
  done: false
  episode_len_mean: 57.25
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.81
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1517
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025686816705365344
          cur_lr: 5.000000000000001e-05
          entropy: 1.3604958666695488
          entropy_coeff: 0.009999999999999998
          kl: 0.01775120083598891
          policy_loss: -0.027449126541614532
          total_loss: 0.04068652126524183
          vf_explained_var: 0.8851577639579773
          vf_loss: 0.08128463530706034
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,366,22121.9,366000,9.81,10,0,57.25




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-06_20-23-55
  done: false
  episode_len_mean: 65.08
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.75
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 1533
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025686816705365344
          cur_lr: 5.000000000000001e-05
          entropy: 1.0274640841616525
          entropy_coeff: 0.009999999999999998
          kl: 0.03459798660509487
          policy_loss: 0.11637818449073367
          total_loss: 0.19708209799395668
          vf_explained_var: 0.9899064898490906
          vf_loss: 0.09008984229423933
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,367,22399.4,367000,9.75,10,0,65.08




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-06_20-27-17
  done: false
  episode_len_mean: 71.17
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.72
  episode_reward_min: 0.0
  episodes_this_iter: 11
  episodes_total: 1544
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038530225058048026
          cur_lr: 5.000000000000001e-05
          entropy: 1.4733178933461508
          entropy_coeff: 0.009999999999999998
          kl: 0.019008380085077253
          policy_loss: 0.17401915374729368
          total_loss: 0.2454905522366365
          vf_explained_var: 0.8116285800933838
          vf_loss: 0.08547217866612805
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,368,22601,368000,9.72,10,0,71.17




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-06_20-31-19
  done: false
  episode_len_mean: 76.09
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.68
  episode_reward_min: 0.0
  episodes_this_iter: 13
  episodes_total: 1557
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038530225058048026
          cur_lr: 5.000000000000001e-05
          entropy: 0.7792815446853638
          entropy_coeff: 0.009999999999999998
          kl: 0.017304321238883296
          policy_loss: -0.20369255741437275
          total_loss: -0.12054287360774146
          vf_explained_var: 0.788899838924408
          vf_loss: 0.09027575680779086
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,369,22843.2,369000,9.68,10,0,76.09




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-06_20-34-43
  done: false
  episode_len_mean: 82.04
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.61
  episode_reward_min: 0.0
  episodes_this_iter: 11
  episodes_total: 1568
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038530225058048026
          cur_lr: 5.000000000000001e-05
          entropy: 1.2527018556992213
          entropy_coeff: 0.009999999999999998
          kl: 0.05575265422513931
          policy_loss: 0.0315584518843227
          total_loss: 0.19943377210034263
          vf_explained_var: 0.7655395269393921
          vf_loss: 0.1782541724542777
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,370,23047.8,370000,9.61,10,0,82.04




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-06_20-39-43
  done: false
  episode_len_mean: 79.96
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.56
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 1584
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.057795337587072004
          cur_lr: 5.000000000000001e-05
          entropy: 0.8193633917305204
          entropy_coeff: 0.009999999999999998
          kl: 0.01416319395161793
          policy_loss: -0.08643087281121148
          total_loss: 0.02964731156826019
          vf_explained_var: 0.9866622686386108
          vf_loss: 0.12345324847847224
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,371,23347,371000,9.56,10,0,79.96




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-06_20-41-52
  done: false
  episode_len_mean: 86.6
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.49
  episode_reward_min: 0.0
  episodes_this_iter: 7
  episodes_total: 1591
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.057795337587072004
          cur_lr: 5.000000000000001e-05
          entropy: 1.335556408762932
          entropy_coeff: 0.009999999999999998
          kl: 0.02170101068034565
          policy_loss: -0.007849726660384072
          total_loss: 0.10754850059747696
          vf_explained_var: 0.8223041892051697
          vf_loss: 0.1274995766994026
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,372,23476.6,372000,9.49,10,0,86.6




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-06_20-45-55
  done: false
  episode_len_mean: 91.9
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.44
  episode_reward_min: 0.0
  episodes_this_iter: 13
  episodes_total: 1604
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 1.103781510061688
          entropy_coeff: 0.009999999999999998
          kl: 0.01478533778307543
          policy_loss: 0.0763790488243103
          total_loss: 0.15892019669214885
          vf_explained_var: 0.7407925128936768
          vf_loss: 0.09229718140429921
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,373,23719.8,373000,9.44,10,0,91.9




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-06_20-50-56
  done: false
  episode_len_mean: 78.59
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.61
  episode_reward_min: 2.0
  episodes_this_iter: 16
  episodes_total: 1620
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 0.6606071213881175
          entropy_coeff: 0.009999999999999998
          kl: 0.015108964200095793
          policy_loss: 0.02888521800438563
          total_loss: 0.37417034982807107
          vf_explained_var: 0.9391394853591919
          vf_loss: 0.3505813599460655
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,374,24020.2,374000,9.61,10,2,78.59




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-06_20-54-37
  done: false
  episode_len_mean: 80.53
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.61
  episode_reward_min: 2.0
  episodes_this_iter: 12
  episodes_total: 1632
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08669300638060806
          cur_lr: 5.000000000000001e-05
          entropy: 1.3229487790001764
          entropy_coeff: 0.009999999999999998
          kl: 0.032874231064178715
          policy_loss: 0.1208570903374089
          total_loss: 0.26767173380487497
          vf_explained_var: 0.7286304235458374
          vf_loss: 0.15719416936238606
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,375,24241.4,375000,9.61,10,2,80.53




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-06_20-59-55
  done: false
  episode_len_mean: 78.38
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.64
  episode_reward_min: 2.0
  episodes_this_iter: 16
  episodes_total: 1648
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.6609452264176474
          entropy_coeff: 0.009999999999999998
          kl: 0.01841836120829332
          policy_loss: -0.06492472274435891
          total_loss: 0.03979549258947372
          vf_explained_var: 0.9864202737808228
          vf_loss: 0.10893455321590105
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,376,24559.3,376000,9.64,10,2,78.38




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-06_21-03-59
  done: false
  episode_len_mean: 78.91
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.67
  episode_reward_min: 2.0
  episodes_this_iter: 12
  episodes_total: 1660
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 1.253672577937444
          entropy_coeff: 0.009999999999999998
          kl: 0.015243203773184572
          policy_loss: 0.10960959361659156
          total_loss: 0.2336595892906189
          vf_explained_var: 0.9848577976226807
          vf_loss: 0.13460450602902307
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,377,24802.9,377000,9.67,10,2,78.91




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-06_21-11-59
  done: false
  episode_len_mean: 64.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.89
  episode_reward_min: 5.0
  episodes_this_iter: 25
  episodes_total: 1685
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.40666637155744767
          entropy_coeff: 0.009999999999999998
          kl: 0.006531729705457407
          policy_loss: 0.02424335161017047
          total_loss: 0.07864961102604866
          vf_explained_var: 0.9928490519523621
          vf_loss: 0.05762354058937894
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,378,25283.4,378000,9.89,10,5,64.84




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-06_21-15-26
  done: false
  episode_len_mean: 63.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.86
  episode_reward_min: 3.0
  episodes_this_iter: 11
  episodes_total: 1696
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 1.0265043911006715
          entropy_coeff: 0.009999999999999998
          kl: 0.0165181693843626
          policy_loss: 0.08944274170531166
          total_loss: 0.205848887645536
          vf_explained_var: 0.927069365978241
          vf_loss: 0.12452317993674013
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,379,25490.6,379000,9.86,10,3,63.14




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-06_21-20-23
  done: false
  episode_len_mean: 62.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 3.0
  episodes_this_iter: 15
  episodes_total: 1711
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.6266145394908057
          entropy_coeff: 0.009999999999999998
          kl: 0.008050143742780985
          policy_loss: -0.1424001806312137
          total_loss: 0.06404493128259976
          vf_explained_var: 0.9794126749038696
          vf_loss: 0.21166442024211088
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,380,25787.3,380000,9.85,10,3,62.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-06_21-23-14
  done: false
  episode_len_mean: 69.71
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.82
  episode_reward_min: 3.0
  episodes_this_iter: 10
  episodes_total: 1721
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.9878618127769894
          entropy_coeff: 0.009999999999999998
          kl: 0.007833253659697528
          policy_loss: 0.014386291553576788
          total_loss: 0.12120325615008672
          vf_explained_var: 0.7308746576309204
          vf_loss: 0.1156769511807296
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,381,25958.4,381000,9.82,10,3,69.71




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-06_21-31-21
  done: false
  episode_len_mean: 62.61
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 3.0
  episodes_this_iter: 25
  episodes_total: 1746
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.26600875324673123
          entropy_coeff: 0.009999999999999998
          kl: 0.0027745040092967423
          policy_loss: -0.06332759050031503
          total_loss: -0.042220966600709495
          vf_explained_var: 0.9954119920730591
          vf_loss: 0.023405914639847147
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,382,26445,382000,9.85,10,3,62.61




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-06_21-37-14
  done: false
  episode_len_mean: 58.08
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.84
  episode_reward_min: 3.0
  episodes_this_iter: 18
  episodes_total: 1764
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 0.6926059785816404
          entropy_coeff: 0.009999999999999998
          kl: 0.007327165161113866
          policy_loss: -0.12404008482893308
          total_loss: -0.04505416295594639
          vf_explained_var: 0.9910990595817566
          vf_loss: 0.08543557077646255
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,383,26798.5,383000,9.84,10,3,58.08




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-06_21-42-54
  done: false
  episode_len_mean: 60.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.78
  episode_reward_min: 3.0
  episodes_this_iter: 18
  episodes_total: 1782
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 0.39117797447575464
          entropy_coeff: 0.009999999999999998
          kl: 0.06510793242347525
          policy_loss: 0.00912994361586041
          total_loss: 0.1887824700938331
          vf_explained_var: 0.9747409224510193
          vf_loss: 0.17933101339472665
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,384,27138.3,384000,9.78,10,3,60.14




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-06_21-49-44
  done: false
  episode_len_mean: 56.55
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.87
  episode_reward_min: 4.0
  episodes_this_iter: 21
  episodes_total: 1803
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.5728282246324751
          entropy_coeff: 0.009999999999999998
          kl: 0.018087116431635786
          policy_loss: -0.012523453517092598
          total_loss: 0.22093340020833743
          vf_explained_var: 0.9620319604873657
          vf_loss: 0.23742110609180397
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,385,27548.1,385000,9.87,10,4,56.55




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-06_21-57-29
  done: false
  episode_len_mean: 47.59
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 24
  episodes_total: 1827
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.28285323414537644
          entropy_coeff: 0.009999999999999998
          kl: 0.003976976986833853
          policy_loss: -0.1807877336939176
          total_loss: -0.16519327892197502
          vf_explained_var: 0.9968392252922058
          vf_loss: 0.018035114980820152
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,386,28012.9,386000,9.91,10,4,47.59




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-06_22-02-47
  done: false
  episode_len_mean: 50.87
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.89
  episode_reward_min: 4.0
  episodes_this_iter: 17
  episodes_total: 1844
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.048764816089092014
          cur_lr: 5.000000000000001e-05
          entropy: 0.7027947339746687
          entropy_coeff: 0.009999999999999998
          kl: 0.01762935468869205
          policy_loss: 0.004677773349814945
          total_loss: 0.016135027011235554
          vf_explained_var: 0.9182518124580383
          vf_loss: 0.01762550801763104
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,387,28330.7,387000,9.89,10,4,50.87




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-06_22-07-50
  done: false
  episode_len_mean: 52.53
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.78
  episode_reward_min: 0.0
  episodes_this_iter: 16
  episodes_total: 1860
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.048764816089092014
          cur_lr: 5.000000000000001e-05
          entropy: 0.8846079409122467
          entropy_coeff: 0.009999999999999998
          kl: 0.07989766662162678
          policy_loss: 0.05912291788392597
          total_loss: 0.3715957178837723
          vf_explained_var: 0.7693992853164673
          vf_loss: 0.317422683040301
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,388,28633.9,388000,9.78,10,0,52.53




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-06_22-16-13
  done: false
  episode_len_mean: 46.51
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 0.0
  episodes_this_iter: 26
  episodes_total: 1886
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 0.20162336048152713
          entropy_coeff: 0.009999999999999998
          kl: 0.015150824244477541
          policy_loss: -0.07436487591928906
          total_loss: -0.06163956771294276
          vf_explained_var: 0.9973024129867554
          vf_loss: 0.013633302873414424
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,389,29136.8,389000,9.85,10,0,46.51




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-06_22-24-51
  done: false
  episode_len_mean: 46.33
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 0.0
  episodes_this_iter: 27
  episodes_total: 1913
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 0.20793638891643948
          entropy_coeff: 0.009999999999999998
          kl: 0.008871403669518527
          policy_loss: -0.06411224024163352
          total_loss: -0.061553855240345
          vf_explained_var: 0.9992354512214661
          vf_loss: 0.00398883189385136
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,390,29655,390000,9.85,10,0,46.33




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-06_22-32-57
  done: false
  episode_len_mean: 45.68
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.86
  episode_reward_min: 0.0
  episodes_this_iter: 25
  episodes_total: 1938
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 0.23565107037623723
          entropy_coeff: 0.009999999999999998
          kl: 0.003131726311314588
          policy_loss: 0.002514624243809117
          total_loss: 0.03560446392123898
          vf_explained_var: 0.993043839931488
          vf_loss: 0.03521727385102875
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,391,30140.8,391000,9.86,10,0,45.68




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-06_22-35-42
  done: false
  episode_len_mean: 48.89
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 0.0
  episodes_this_iter: 9
  episodes_total: 1947
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03657361206681903
          cur_lr: 5.000000000000001e-05
          entropy: 1.2549117237329483
          entropy_coeff: 0.009999999999999998
          kl: 0.030202076990202366
          policy_loss: -0.07803759053349495
          total_loss: -0.017236311485370002
          vf_explained_var: 0.34683161973953247
          vf_loss: 0.07224579819788536
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,392,30305.5,392000,9.85,10,0,48.89




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-06_22-42-31
  done: false
  episode_len_mean: 45.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.96
  episode_reward_min: 7.0
  episodes_this_iter: 21
  episodes_total: 1968
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 0.28307517055008147
          entropy_coeff: 0.009999999999999998
          kl: 0.004730882896089413
          policy_loss: -0.28368813577625485
          total_loss: -0.27106925861703024
          vf_explained_var: 0.9978623390197754
          vf_loss: 0.015190090689187249
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,393,30715.3,393000,9.96,10,7,45.28




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-06_22-46-35
  done: false
  episode_len_mean: 51.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.93
  episode_reward_min: 7.0
  episodes_this_iter: 13
  episodes_total: 1981
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027430209050114267
          cur_lr: 5.000000000000001e-05
          entropy: 1.1735174417495728
          entropy_coeff: 0.009999999999999998
          kl: 0.06465250921009372
          policy_loss: 0.088569505016009
          total_loss: 0.13901454353084167
          vf_explained_var: 0.6931113600730896
          vf_loss: 0.06040678092588981
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,394,30958.6,394000,9.93,10,7,51.84




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-06_22-54-58
  done: false
  episode_len_mean: 52.08
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 7.0
  episodes_this_iter: 26
  episodes_total: 2007
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04114531357517139
          cur_lr: 5.000000000000001e-05
          entropy: 0.21883991791142357
          entropy_coeff: 0.009999999999999998
          kl: 0.0024702566102803598
          policy_loss: -0.04197828186055024
          total_loss: -0.0284561600536108
          vf_explained_var: 0.9973160624504089
          vf_loss: 0.015608881465676757
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,395,31462,395000,9.94,10,7,52.08




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-06_23-03-37
  done: false
  episode_len_mean: 51.8
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 7.0
  episodes_this_iter: 27
  episodes_total: 2034
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020572656787585696
          cur_lr: 5.000000000000001e-05
          entropy: 0.17748646719588174
          entropy_coeff: 0.009999999999999998
          kl: 0.005161419672021273
          policy_loss: -0.030744417756795882
          total_loss: -0.0281711146235466
          vf_explained_var: 0.9992408156394958
          vf_loss: 0.004241984031007936
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,396,31980.6,396000,9.94,10,7,51.8




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-06_23-12-00
  done: false
  episode_len_mean: 45.07
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.97
  episode_reward_min: 7.0
  episodes_this_iter: 26
  episodes_total: 2060
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020572656787585696
          cur_lr: 5.000000000000001e-05
          entropy: 0.1942689738339848
          entropy_coeff: 0.009999999999999998
          kl: 0.0041083370982254825
          policy_loss: -0.10451469851864709
          total_loss: -0.1025053944852617
          vf_explained_var: 0.9993113279342651
          vf_loss: 0.0038674723809688457
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,397,32484.1,397000,9.97,10,7,45.07




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-06_23-19-46
  done: false
  episode_len_mean: 38.94
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 10.0
  episode_reward_min: 10.0
  episodes_this_iter: 24
  episodes_total: 2084
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010286328393792848
          cur_lr: 5.000000000000001e-05
          entropy: 0.2729147232241101
          entropy_coeff: 0.009999999999999998
          kl: 0.005528151017674767
          policy_loss: -0.06458315683735741
          total_loss: -0.06149415916038884
          vf_explained_var: 0.999154806137085
          vf_loss: 0.005761279114004638
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,398,32949.8,398000,10,10,10,38.94




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-06_23-25-39
  done: false
  episode_len_mean: 38.95
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 10.0
  episode_reward_min: 10.0
  episodes_this_iter: 18
  episodes_total: 2102
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010286328393792848
          cur_lr: 5.000000000000001e-05
          entropy: 0.3571313351392746
          entropy_coeff: 0.009999999999999998
          kl: 0.004444099297633791
          policy_loss: -0.2232437307635943
          total_loss: -0.1810851232873069
          vf_explained_var: 0.9960237145423889
          vf_loss: 0.04568420670305689
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,399,33303,399000,10,10,10,38.95


Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-06_23-25-56
  done: false
  episode_len_mean: 45.89
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.95
  episode_reward_min: 5.0
  episodes_this_iter: 1
  episodes_total: 2103
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005143164196896424
          cur_lr: 5.000000000000001e-05
          entropy: 1.7887945625517103
          entropy_coeff: 0.009999999999999998
          kl: 0.022585909833293102
          policy_loss: -0.05887875370681286
          total_loss: -0.011044324396385087
          vf_explained_var: 0.7905305624008179
          vf_loss: 0.06560621394051445
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,400,33320,400000,9.95,10,5,45.89




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-06_23-33-37
  done: false
  episode_len_mean: 52.15
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.92
  episode_reward_min: 5.0
  episodes_this_iter: 25
  episodes_total: 2128
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007714746295344637
          cur_lr: 5.000000000000001e-05
          entropy: 0.41547826098071206
          entropy_coeff: 0.009999999999999998
          kl: 0.010782521084633127
          policy_loss: 0.028601248231199052
          total_loss: 0.052595579044686425
          vf_explained_var: 0.9961450099945068
          vf_loss: 0.028065928128651448
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,401,33781,401000,9.92,10,5,52.15




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-06_23-38-38
  done: false
  episode_len_mean: 52.1
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.92
  episode_reward_min: 5.0
  episodes_this_iter: 15
  episodes_total: 2143
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007714746295344637
          cur_lr: 5.000000000000001e-05
          entropy: 0.582109675473637
          entropy_coeff: 0.009999999999999998
          kl: 0.024385273652444105
          policy_loss: -0.29414747489823234
          total_loss: -0.25669565399487815
          vf_explained_var: 0.9951498508453369
          vf_loss: 0.04308479121989674
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,402,34081.3,402000,9.92,10,5,52.1




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-06_23-46-01
  done: false
  episode_len_mean: 57.31
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.88
  episode_reward_min: 5.0
  episodes_this_iter: 24
  episodes_total: 2167
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011572119443016953
          cur_lr: 5.000000000000001e-05
          entropy: 0.4207546404666371
          entropy_coeff: 0.009999999999999998
          kl: 0.019580190558530976
          policy_loss: 0.04810736080010732
          total_loss: 0.06669452024830712
          vf_explained_var: 0.9977976679801941
          vf_loss: 0.022568118686063423
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,403,34525,403000,9.88,10,5,57.31




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-06_23-52-32
  done: false
  episode_len_mean: 56.28
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.88
  episode_reward_min: 5.0
  episodes_this_iter: 20
  episodes_total: 2187
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011572119443016953
          cur_lr: 5.000000000000001e-05
          entropy: 0.21538236141204833
          entropy_coeff: 0.009999999999999998
          kl: 0.007138885377988046
          policy_loss: -0.2943386336167653
          total_loss: -0.266014274292522
          vf_explained_var: 0.9958807229995728
          vf_loss: 0.030395570190416443
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,404,34915.3,404000,9.88,10,5,56.28




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-06_23-56-50
  done: false
  episode_len_mean: 63.37
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.82
  episode_reward_min: 4.0
  episodes_this_iter: 14
  episodes_total: 2201
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011572119443016953
          cur_lr: 5.000000000000001e-05
          entropy: 1.0142738216453129
          entropy_coeff: 0.009999999999999998
          kl: 0.02267104432239507
          policy_loss: 0.10285578436321682
          total_loss: 0.16348659329944187
          vf_explained_var: 0.6835916042327881
          vf_loss: 0.07051119226962328
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,405,35174.1,405000,9.82,10,4,63.37




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-07_00-04-02
  done: false
  episode_len_mean: 50.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.9
  episode_reward_min: 4.0
  episodes_this_iter: 22
  episodes_total: 2223
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01735817916452543
          cur_lr: 5.000000000000001e-05
          entropy: 0.17181754062573115
          entropy_coeff: 0.009999999999999998
          kl: 0.004507616438439873
          policy_loss: -0.2916649442580011
          total_loss: -0.28800704562001755
          vf_explained_var: 0.999212384223938
          vf_loss: 0.005297826062370506
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,406,35605.8,406000,9.9,10,4,50.14




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-07_00-08-41
  done: false
  episode_len_mean: 56.3
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.87
  episode_reward_min: 4.0
  episodes_this_iter: 15
  episodes_total: 2238
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008679089582262715
          cur_lr: 5.000000000000001e-05
          entropy: 1.2663153110278977
          entropy_coeff: 0.009999999999999998
          kl: 0.03499292578013719
          policy_loss: 0.06959822575251261
          total_loss: 0.07533338864644369
          vf_explained_var: 0.6533120274543762
          vf_loss: 0.01809460975540181
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,407,35884.1,407000,9.87,10,4,56.3




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-07_00-17-19
  done: false
  episode_len_mean: 50.84
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 27
  episodes_total: 2265
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013018634373394069
          cur_lr: 5.000000000000001e-05
          entropy: 0.15960404723882676
          entropy_coeff: 0.009999999999999998
          kl: 0.00259320979845064
          policy_loss: -0.03164799677001105
          total_loss: -0.023951005045738487
          vf_explained_var: 0.9983144998550415
          vf_loss: 0.009259273481762243
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,408,36402.7,408000,9.91,10,4,50.84




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-07_00-26-02
  done: false
  episode_len_mean: 43.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.97
  episode_reward_min: 7.0
  episodes_this_iter: 27
  episodes_total: 2292
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006509317186697034
          cur_lr: 5.000000000000001e-05
          entropy: 0.16885686318079632
          entropy_coeff: 0.009999999999999998
          kl: 0.01193101035595807
          policy_loss: 0.022779914860924084
          total_loss: 0.02207124042842123
          vf_explained_var: 0.9998335838317871
          vf_loss: 0.0009022354963235558
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,409,36925.6,409000,9.97,10,7,43.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-07_00-29-44
  done: false
  episode_len_mean: 49.05
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 12
  episodes_total: 2304
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006509317186697034
          cur_lr: 5.000000000000001e-05
          entropy: 0.854605392118295
          entropy_coeff: 0.009999999999999998
          kl: 0.04026606089597012
          policy_loss: -0.09838341606987847
          total_loss: -0.04281732944978608
          vf_explained_var: 0.8120838403701782
          vf_loss: 0.06385003911952178
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,410,37147.6,410000,9.91,10,4,49.05




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-07_00-35-20
  done: false
  episode_len_mean: 49.05
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 17
  episodes_total: 2321
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009763975780045554
          cur_lr: 5.000000000000001e-05
          entropy: 0.40508854389190674
          entropy_coeff: 0.009999999999999998
          kl: 0.010848968775318222
          policy_loss: -0.11124309566285875
          total_loss: -0.05707364943292406
          vf_explained_var: 0.9954920411109924
          vf_loss: 0.058114403838084805
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,411,37483.5,411000,9.91,10,4,49.05




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-07_00-41-52
  done: false
  episode_len_mean: 48.69
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.88
  episode_reward_min: 4.0
  episodes_this_iter: 21
  episodes_total: 2342
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009763975780045554
          cur_lr: 5.000000000000001e-05
          entropy: 0.6247342692481147
          entropy_coeff: 0.009999999999999998
          kl: 0.036836105736871794
          policy_loss: 0.037269164787398446
          total_loss: 0.08632295355200767
          vf_explained_var: 0.9560325741767883
          vf_loss: 0.054941470010413065
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,412,37875.1,412000,9.88,10,4,48.69




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-07_00-47-15
  done: false
  episode_len_mean: 52.03
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 4.0
  episodes_this_iter: 17
  episodes_total: 2359
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01464596367006833
          cur_lr: 5.000000000000001e-05
          entropy: 0.6587796969546212
          entropy_coeff: 0.009999999999999998
          kl: 0.025629075482544384
          policy_loss: 0.09493327902423011
          total_loss: 0.1600319458378686
          vf_explained_var: 0.9908716082572937
          vf_loss: 0.07131110530139671
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,413,38198.3,413000,9.85,10,4,52.03




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-07_00-55-53
  done: false
  episode_len_mean: 52.14
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 4.0
  episodes_this_iter: 27
  episodes_total: 2386
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0219689455051025
          cur_lr: 5.000000000000001e-05
          entropy: 0.19342709084351858
          entropy_coeff: 0.009999999999999998
          kl: 0.004434006985433076
          policy_loss: -0.054221560226546395
          total_loss: -0.03961558604819907
          vf_explained_var: 0.9969868063926697
          vf_loss: 0.016442834756647547
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,414,38716.5,414000,9.85,10,4,52.14




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-07_01-04-37
  done: false
  episode_len_mean: 46.43
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 4.0
  episodes_this_iter: 27
  episodes_total: 2413
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01098447275255125
          cur_lr: 5.000000000000001e-05
          entropy: 0.17236345592472288
          entropy_coeff: 0.009999999999999998
          kl: 0.004442623286995162
          policy_loss: -0.050802675137917204
          total_loss: -0.047529347323709066
          vf_explained_var: 0.9991070032119751
          vf_loss: 0.0049481606861162515
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,415,39240,415000,9.91,10,4,46.43




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-07_01-13-19
  done: false
  episode_len_mean: 40.47
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.97
  episode_reward_min: 7.0
  episodes_this_iter: 27
  episodes_total: 2440
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005492236376275625
          cur_lr: 5.000000000000001e-05
          entropy: 0.186288135084841
          entropy_coeff: 0.009999999999999998
          kl: 0.007778375548004821
          policy_loss: -0.0024368269162045584
          total_loss: -0.0007827210343546337
          vf_explained_var: 0.9993687868118286
          vf_loss: 0.003474267548881471
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,416,39762.8,416000,9.97,10,7,40.47




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-07_01-22-05
  done: false
  episode_len_mean: 37.2
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.99
  episode_reward_min: 9.0
  episodes_this_iter: 27
  episodes_total: 2467
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005492236376275625
          cur_lr: 5.000000000000001e-05
          entropy: 0.2555969364113278
          entropy_coeff: 0.009999999999999998
          kl: 0.006247550312345017
          policy_loss: -0.04023364500866996
          total_loss: -0.028176356106996536
          vf_explained_var: 0.998226523399353
          vf_loss: 0.014578944985340866
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,417,40288.2,417000,9.99,10,9,37.2




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-07_01-30-55
  done: false
  episode_len_mean: 37.25
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.98
  episode_reward_min: 9.0
  episodes_this_iter: 27
  episodes_total: 2494
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005492236376275625
          cur_lr: 5.000000000000001e-05
          entropy: 0.22822539624240662
          entropy_coeff: 0.009999999999999998
          kl: 0.003719363133922708
          policy_loss: 0.03562237839731905
          total_loss: 0.05001556966453791
          vf_explained_var: 0.9975521564483643
          vf_loss: 0.016655015358183946
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,418,40818.4,418000,9.98,10,9,37.25




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-07_01-39-18
  done: false
  episode_len_mean: 37.35
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.98
  episode_reward_min: 9.0
  episodes_this_iter: 26
  episodes_total: 2520
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0027461181881378127
          cur_lr: 5.000000000000001e-05
          entropy: 0.23545740909046597
          entropy_coeff: 0.009999999999999998
          kl: 0.007011463799953526
          policy_loss: -0.01554549526837137
          total_loss: -0.008552145833770433
          vf_explained_var: 0.9986149072647095
          vf_loss: 0.009328671577127858
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,419,41321.7,419000,9.98,10,9,37.35




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-07_01-48-23
  done: false
  episode_len_mean: 37.43
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.98
  episode_reward_min: 9.0
  episodes_this_iter: 28
  episodes_total: 2548
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0027461181881378127
          cur_lr: 5.000000000000001e-05
          entropy: 0.17378531065252092
          entropy_coeff: 0.009999999999999998
          kl: 0.006668109540218797
          policy_loss: -0.09052783966892296
          total_loss: -0.0906307159198655
          vf_explained_var: 0.9997170567512512
          vf_loss: 0.0016166655107453052
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,420,41866.4,420000,9.98,10,9,37.43




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-07_01-51-49
  done: false
  episode_len_mean: 43.4
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 5.0
  episodes_this_iter: 10
  episodes_total: 2558
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0027461181881378127
          cur_lr: 5.000000000000001e-05
          entropy: 0.9842894653479258
          entropy_coeff: 0.009999999999999998
          kl: 0.031238585549274875
          policy_loss: 0.037071999328003986
          total_loss: 0.17729925157295334
          vf_explained_var: 0.6485320329666138
          vf_loss: 0.1499843576302131
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,421,42071.9,421000,9.94,10,5,43.4




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-07_02-00-31
  done: false
  episode_len_mean: 43.06
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.95
  episode_reward_min: 5.0
  episodes_this_iter: 27
  episodes_total: 2585
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004119177282206718
          cur_lr: 5.000000000000001e-05
          entropy: 0.15820346110396916
          entropy_coeff: 0.009999999999999998
          kl: 0.016163398766668693
          policy_loss: -0.06541370087199741
          total_loss: -0.06199328348868423
          vf_explained_var: 0.9990152716636658
          vf_loss: 0.004935873369686305
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,422,42594.2,422000,9.95,10,5,43.06




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-07_02-09-37
  done: false
  episode_len_mean: 42.81
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 5.0
  episodes_this_iter: 28
  episodes_total: 2613
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004119177282206718
          cur_lr: 5.000000000000001e-05
          entropy: 0.12674451230300798
          entropy_coeff: 0.009999999999999998
          kl: 0.00343580510502247
          policy_loss: -0.11424134853813384
          total_loss: -0.10466922705786096
          vf_explained_var: 0.9977912902832031
          vf_loss: 0.010825413102025373
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,423,43139.8,423000,9.94,10,5,42.81




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-07_02-18-42
  done: false
  episode_len_mean: 42.46
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 5.0
  episodes_this_iter: 28
  episodes_total: 2641
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002059588641103359
          cur_lr: 5.000000000000001e-05
          entropy: 0.13057645724879371
          entropy_coeff: 0.009999999999999998
          kl: 0.0039013712098325626
          policy_loss: -0.021278336892525354
          total_loss: -0.013489455170929431
          vf_explained_var: 0.9979986548423767
          vf_loss: 0.009086610739015871
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,424,43685.5,424000,9.94,10,5,42.46




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-07_02-27-43
  done: false
  episode_len_mean: 35.82
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.99
  episode_reward_min: 9.0
  episodes_this_iter: 28
  episodes_total: 2669
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010297943205516795
          cur_lr: 5.000000000000001e-05
          entropy: 0.19145269923739963
          entropy_coeff: 0.009999999999999998
          kl: 0.01568172849232378
          policy_loss: -0.04522459838125441
          total_loss: -0.04438877370622423
          vf_explained_var: 0.9995026588439941
          vf_loss: 0.0027342030752657187
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,425,44226.7,425000,9.99,10,9,35.82




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-07_02-36-52
  done: false
  episode_len_mean: 35.73
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.98
  episode_reward_min: 9.0
  episodes_this_iter: 28
  episodes_total: 2697
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010297943205516795
          cur_lr: 5.000000000000001e-05
          entropy: 0.15550366093715032
          entropy_coeff: 0.009999999999999998
          kl: 0.003078783411256245
          policy_loss: -0.08344658650457859
          total_loss: -0.08224405099948247
          vf_explained_var: 0.999416172504425
          vf_loss: 0.0027544029005285766
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,426,44774.8,426000,9.98,10,9,35.73




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-07_02-40-32
  done: false
  episode_len_mean: 35.67
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.98
  episode_reward_min: 9.0
  episodes_this_iter: 11
  episodes_total: 2708
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005148971602758397
          cur_lr: 5.000000000000001e-05
          entropy: 0.6512993269496494
          entropy_coeff: 0.009999999999999998
          kl: 0.13977637838877757
          policy_loss: -0.09137499406933784
          total_loss: -0.06868764023400015
          vf_explained_var: 0.7935608625411987
          vf_loss: 0.029128375887456866
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,427,44995.4,427000,9.98,10,9,35.67




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-07_02-44-22
  done: false
  episode_len_mean: 47.06
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.91
  episode_reward_min: 5.0
  episodes_this_iter: 13
  episodes_total: 2721
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007723457404137598
          cur_lr: 5.000000000000001e-05
          entropy: 0.9974282357427809
          entropy_coeff: 0.009999999999999998
          kl: 0.07274798116289899
          policy_loss: 0.12897009290754796
          total_loss: 0.2542429584595892
          vf_explained_var: 0.9865066409111023
          vf_loss: 0.1351909663114283
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,428,45224.9,428000,9.91,10,5,47.06




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-07_02-48-47
  done: false
  episode_len_mean: 52.11
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 4.0
  episodes_this_iter: 14
  episodes_total: 2735
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011585186106206396
          cur_lr: 5.000000000000001e-05
          entropy: 1.0137239426374436
          entropy_coeff: 0.009999999999999998
          kl: 0.020186166041570728
          policy_loss: 0.10869759817918141
          total_loss: 0.17629533641868167
          vf_explained_var: 0.9507269859313965
          vf_loss: 0.07771159114523066
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,429,45490.3,429000,9.85,10,4,52.11




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-07_02-57-14
  done: false
  episode_len_mean: 52.29
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.85
  episode_reward_min: 4.0
  episodes_this_iter: 26
  episodes_total: 2761
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 0.16389611545536253
          entropy_coeff: 0.009999999999999998
          kl: 0.007476586071234505
          policy_loss: -0.1611191901895735
          total_loss: -0.1585328015188376
          vf_explained_var: 0.9991185665130615
          vf_loss: 0.004212356268221306
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,430,45997.6,430000,9.85,10,4,52.29




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-07_03-00-39
  done: false
  episode_len_mean: 59.11
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.82
  episode_reward_min: 4.0
  episodes_this_iter: 11
  episodes_total: 2772
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017377779159309594
          cur_lr: 5.000000000000001e-05
          entropy: 0.77193650106589
          entropy_coeff: 0.009999999999999998
          kl: 0.16798822817821943
          policy_loss: 0.18715107730693287
          total_loss: 0.199093672964308
          vf_explained_var: -0.06679344922304153
          vf_loss: 0.019370034797531036
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,431,46202.6,431000,9.82,10,4,59.11




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-07_03-06-20
  done: false
  episode_len_mean: 62.78
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.78
  episode_reward_min: 4.0
  episodes_this_iter: 18
  episodes_total: 2790
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002606666873896439
          cur_lr: 5.000000000000001e-05
          entropy: 0.8222079628043705
          entropy_coeff: 0.009999999999999998
          kl: 0.0286963962603464
          policy_loss: 0.11056970722145504
          total_loss: 0.16624281737539504
          vf_explained_var: 0.9919306635856628
          vf_loss: 0.06382039058953523
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,432,46543.4,432000,9.78,10,4,62.78




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-07_03-15-05
  done: false
  episode_len_mean: 51.5
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.87
  episode_reward_min: 4.0
  episodes_this_iter: 27
  episodes_total: 2817
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0039100003108446585
          cur_lr: 5.000000000000001e-05
          entropy: 0.1773126459783978
          entropy_coeff: 0.009999999999999998
          kl: 0.011385215841681933
          policy_loss: -0.03959035393264559
          total_loss: -0.03451693302227391
          vf_explained_var: 0.9988584518432617
          vf_loss: 0.0068020303086895086
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,433,47067.7,433000,9.87,10,4,51.5




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-07_03-24-09
  done: false
  episode_len_mean: 46.4
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.93
  episode_reward_min: 6.0
  episodes_this_iter: 28
  episodes_total: 2845
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0039100003108446585
          cur_lr: 5.000000000000001e-05
          entropy: 0.15225693467590545
          entropy_coeff: 0.009999999999999998
          kl: 0.0027889614911111712
          policy_loss: -0.04199977144598961
          total_loss: -0.0410387870338228
          vf_explained_var: 0.999521017074585
          vf_loss: 0.002472648777378102
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,434,47611.9,434000,9.93,10,6,46.4




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-07_03-29-34
  done: false
  episode_len_mean: 43.36
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.93
  episode_reward_min: 6.0
  episodes_this_iter: 17
  episodes_total: 2862
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019550001554223292
          cur_lr: 5.000000000000001e-05
          entropy: 0.6993767600920465
          entropy_coeff: 0.009999999999999998
          kl: 0.013104048463527463
          policy_loss: 0.04864727904399236
          total_loss: 0.08696574014094141
          vf_explained_var: 0.32974234223365784
          vf_loss: 0.04528660679029094
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,435,47936.8,435000,9.93,10,6,43.36




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-07_03-34-41
  done: false
  episode_len_mean: 43.65
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.94
  episode_reward_min: 7.0
  episodes_this_iter: 16
  episodes_total: 2878
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019550001554223292
          cur_lr: 5.000000000000001e-05
          entropy: 0.7426646189557181
          entropy_coeff: 0.009999999999999998
          kl: 0.01646014812855207
          policy_loss: -0.09749362481137117
          total_loss: -0.08406951849659285
          vf_explained_var: 0.887431263923645
          vf_loss: 0.02081857088746296
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,436,48244.5,436000,9.94,10,7,43.65




Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-07_03-42-53
  done: false
  episode_len_mean: 45.24
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.89
  episode_reward_min: 5.0
  episodes_this_iter: 25
  episodes_total: 2903
  experiment_id: 49db762336a445169df010321c0a100e
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019550001554223292
          cur_lr: 5.000000000000001e-05
          entropy: 0.3731069521771537
          entropy_coeff: 0.009999999999999998
          kl: 0.011511400518838302
          policy_loss: 0.02085147549708684
          total_loss: 0.05403999967707528
          vf_explained_var: 0.996947169303894
          vf_loss: 0.036897092731669544
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,RUNNING,192.168.3.5:175,437,48736.1,437000,9.89,10,5,45.24


2021-10-07 03:53:30,240	ERROR trial_runner.py:773 -- Trial PPO_my_env_21374_00000: Error processing event.
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 739, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 746, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/_private/client_mode_hook.py", line 82, in wrapper
    return func(*args, **kwargs)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/worker.py", line 1621, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(timeout): [36mray::PPO.train_buffered()[39m (pid=175, ip=192.168.3.5, repr=PPO)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trainable.py", line 178, in train_

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Result for PPO_my_env_21374_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-07_03-42-53
  done: false
  episode_len_mean: 45.24
  episode_media: {}
  episode_reward_max: 10.0
  episode_reward_mean: 9.89
  episode_reward_min: 5.0
  episodes_this_iter: 25
  episodes_total: 2903
  experiment_id: 49db762336a445169df010321c0a100e
  experiment_tag: '0'
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019550001554223292
          cur_lr: 5.000000000000001e-05
          entropy: 0.3731069521771537
          entropy_coeff: 0.009999999999999998
          kl: 0.011511400518838302
          policy_loss: 0.02085147549708684
          total_loss: 0.05403999967707528
          vf_explained_var: 0.996947169303894
          vf_loss: 0.036897092731669544
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_sinc

0,1
agent_timesteps_total,437000.0
episode_len_mean,45.24
episode_reward_max,10.0
episode_reward_mean,9.89
episode_reward_min,5.0
episodes_this_iter,25.0
episodes_total,2903.0
info/learner/default_policy/learner_stats/cur_kl_coeff,0.00196
info/learner/default_policy/learner_stats/cur_lr,5e-05
info/learner/default_policy/learner_stats/entropy,0.37311


0,1
agent_timesteps_total,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
episode_len_mean,█▇▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▅▅▅▅▅▄▄▄▄▂▂▂▁▁▁▁▁▁▁▁▁
episode_reward_max,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆▆▆▆████████████████
episode_reward_mean,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▃▃▄▃▅▇▇█████████
episode_reward_min,▆▆▆▆▆▆▆▆▆▆▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▃▁▁▃▆▆▇▆▇▆▇▇▇█
episodes_this_iter,▁▁▁▂▁▁▂▁▁▁▁▂▁▂▂▂▂▁▁▁▁▂▂▂▁▂▂▁▃▇▆▄▇▄▅▅▁▆█▅
episodes_total,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▅▅▆▆▇▇█
info/learner/default_policy/learner_stats/cur_kl_coeff,▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▃▃▃▃▃▅▄█▂▂▁▁▁▁▁▁▁▁▁
info/learner/default_policy/learner_stats/cur_lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
info/learner/default_policy/learner_stats/entropy,▄▆█▇▇▇▇█▇▇▇▅▅▇▇▆▆▅▆▅▆▆▅▅▅▅▄▅▄▂▂▄▁▅▂▃▆▂▁▃




Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,ERROR,,437,48736.1,437000,9.89,10,5,45.24

Trial name,# failures,error file
PPO_my_env_21374_00000,1,/root/ray_results/PPO_2021-10-06_14-10-15/PPO_my_env_21374_00000_0_2021-10-06_14-10-16/error.txt


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_21374_00000,ERROR,,437,48736.1,437000,9.89,10,5,45.24

Trial name,# failures,error file
PPO_my_env_21374_00000,1,/root/ray_results/PPO_2021-10-06_14-10-15/PPO_my_env_21374_00000_0_2021-10-06_14-10-16/error.txt


[2m[36m(pid=176)[0m Attempted to send kill command to minecraft process and failed with exception timed out


TuneError: ('Trials did not complete', [PPO_my_env_21374_00000])