In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.mlp = nn.Sequential(
            nn.Linear(features_dim, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
            nn.Linear(128, 128),
            nn.ELU(),
        )
        self.action_head = nn.Linear(128, action_space.n)
        self.value_head = nn.Linear(128, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.mlp.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
        
        with torch.no_grad():
            features = self.encoder(obs)
        features = self.mlp(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C32 pretrained (AngelaCNN + MLP) (3 noops after placement)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-09 14:51:38,435	INFO wandb.py:170 -- Already logged into W&B.
2021-10-09 14:51:38,449	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_67f57_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=202262)[0m 2021-10-09 14:51:41,877	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=202262)[0m 2021-10-09 14:51:41,877	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-09_14-53-07
  done: false
  episode_len_mean: 424.0
  episode_media: {}
  episode_reward_max: -7.0
  episode_reward_mean: -8.5
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.880436126391093
          entropy_coeff: 0.009999999999999998
          kl: 0.004079058113112211
          policy_loss: 0.03850563491384188
          total_loss: 0.6827736003531351
          vf_explained_var: 0.07799533754587173
          vf_loss: 0.672256518734826
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,1,80.3045,1000,-8.5,-7,-10,424


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-09_14-53-29
  done: false
  episode_len_mean: 412.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -6.25
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.875970220565796
          entropy_coeff: 0.009999999999999998
          kl: 0.0027537142600012584
          policy_loss: 0.052672796448071796
          total_loss: 0.3165993462006251
          vf_explained_var: 0.139669731259346
          vf_loss: 0.29241087711416186
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,2,101.5,2000,-6.25,0,-10,412.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-09_14-53-49
  done: false
  episode_len_mean: 413.42857142857144
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -3.5714285714285716
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.8746728261311847
          entropy_coeff: 0.009999999999999998
          kl: 0.004421391519707324
          policy_loss: -0.1367511188818349
          total_loss: -0.14056452131933636
          vf_explained_var: 0.29280728101730347
          vf_loss: 0.02471225268414451
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,3,121.618,3000,-3.57143,0,-10,413.429


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-09_14-54-09
  done: false
  episode_len_mean: 415.44444444444446
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.7777777777777777
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 2.8570196363661022
          entropy_coeff: 0.009999999999999998
          kl: 0.00488493117107459
          policy_loss: -0.10939110103580686
          total_loss: -0.12975025177001953
          vf_explained_var: 0.5112901329994202
          vf_loss: 0.008088923230146369
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,4,141.721,4000,-2.77778,0,-10,415.444


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-09_14-54-27
  done: false
  episode_len_mean: 416.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.0833333333333335
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.8502304368548925
          entropy_coeff: 0.009999999999999998
          kl: 0.003543588223740003
          policy_loss: 0.010007439967658785
          total_loss: -0.014476039715939098
          vf_explained_var: 0.174942746758461
          vf_loss: 0.003974528271161641
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,5,159.475,5000,-2.08333,0,-10,416.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-09_14-54-46
  done: false
  episode_len_mean: 414.57142857142856
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.7857142857142858
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.812695919142829
          entropy_coeff: 0.009999999999999998
          kl: 0.0058011812608874
          policy_loss: -0.025078250902394454
          total_loss: -0.05151641898685032
          vf_explained_var: -0.2602919936180115
          vf_loss: 0.0016525333960695813
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,6,178.674,6000,-1.78571,0,-10,414.571


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-09_14-55-03
  done: false
  episode_len_mean: 418.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.5625
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 16
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7877673864364625
          entropy_coeff: 0.009999999999999998
          kl: 0.007438848466373763
          policy_loss: -0.06832649923033185
          total_loss: -0.09399162381887435
          vf_explained_var: -0.09468455612659454
          vf_loss: 0.0021660564887699568
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,7,195.989,7000,-1.5625,0,-10,418.75


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-09_14-55-22
  done: false
  episode_len_mean: 417.57894736842104
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.3157894736842106
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 19
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.807117811838786
          entropy_coeff: 0.009999999999999998
          kl: 0.008930719075401801
          policy_loss: -0.008763201120826934
          total_loss: -0.0355679704911179
          vf_explained_var: 0.07574610412120819
          vf_loss: 0.0012105925015122112
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,8,214.873,8000,-1.31579,0,-10,417.579


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-09_14-55-40
  done: false
  episode_len_mean: 417.3809523809524
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.1904761904761905
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 21
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.797969272401598
          entropy_coeff: 0.009999999999999998
          kl: 0.007604839293667275
          policy_loss: -0.08543911029895147
          total_loss: -0.11236110279957454
          vf_explained_var: -0.024789465591311455
          vf_loss: 0.0010101681122452848
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,9,233.146,9000,-1.19048,0,-10,417.381


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-09_14-55-59
  done: false
  episode_len_mean: 416.4166666666667
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0416666666666667
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 24
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.8071422550413345
          entropy_coeff: 0.009999999999999998
          kl: 0.007195428385372674
          policy_loss: 0.0961694684293535
          total_loss: 0.06872350097530418
          vf_explained_var: -0.5981001257896423
          vf_loss: 0.0005804844284688846
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,10,251.533,10000,-1.04167,0,-10,416.417


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-09_14-56-17
  done: false
  episode_len_mean: 416.38461538461536
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9615384615384616
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.8235095103581744
          entropy_coeff: 0.009999999999999998
          kl: 0.005562907965229572
          policy_loss: -0.013870128782259093
          total_loss: -0.041662323930197295
          vf_explained_var: -0.582188069820404
          vf_loss: 0.0004081333272754111
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,11,269.941,11000,-0.961538,0,-10,416.385


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-09_14-56-35
  done: false
  episode_len_mean: 414.07142857142856
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8928571428571429
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.83771960205502
          entropy_coeff: 0.009999999999999998
          kl: 0.0048517568324885785
          policy_loss: -0.055631975498464374
          total_loss: -0.08358178801006741
          vf_explained_var: -0.37071260809898376
          vf_loss: 0.0003970581467405686
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,12,287.707,12000,-0.892857,0,-10,414.071




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-09_14-57-11
  done: false
  episode_len_mean: 412.4516129032258
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8064516129032258
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 31
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.812476791275872
          entropy_coeff: 0.009999999999999998
          kl: 0.009241330296108647
          policy_loss: -0.015302751378880607
          total_loss: -0.043122512764400905
          vf_explained_var: -0.9126608371734619
          vf_loss: 0.0002761285365623836
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,13,324.055,13000,-0.806452,0,-10,412.452


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-09_14-57-32
  done: false
  episode_len_mean: 409.1764705882353
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7352941176470589
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 34
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7884134186638727
          entropy_coeff: 0.009999999999999998
          kl: 0.008132114732154132
          policy_loss: -0.06150064818147156
          total_loss: -0.0891132962786489
          vf_explained_var: -0.7315559387207031
          vf_loss: 0.0002460756238886259
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,14,344.175,14000,-0.735294,0,-10,409.176


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-09_14-57-54
  done: false
  episode_len_mean: 404.4864864864865
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6756756756756757
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 37
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.740892590416802
          entropy_coeff: 0.009999999999999998
          kl: 0.007131264831832339
          policy_loss: 0.04433749947283003
          total_loss: 0.01726107680135303
          vf_explained_var: -0.539760410785675
          vf_loss: 0.00031021600564902957
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,15,366.738,15000,-0.675676,0,-10,404.486


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-09_14-58-16
  done: false
  episode_len_mean: 402.02564102564105
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6410256410256411
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7316100226508246
          entropy_coeff: 0.009999999999999998
          kl: 0.008605314044495296
          policy_loss: 0.04419074174430635
          total_loss: 0.01727470623122321
          vf_explained_var: -0.37534114718437195
          vf_loss: 0.00037317395294343843
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,16,388.914,16000,-0.641026,0,-10,402.026


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-09_14-58-38
  done: false
  episode_len_mean: 399.6190476190476
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5952380952380952
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7270883825090197
          entropy_coeff: 0.009999999999999998
          kl: 0.007140369169875971
          policy_loss: 0.03136580172512266
          total_loss: 0.004654419091012742
          vf_explained_var: 0.1879970133304596
          vf_loss: 0.0005371882479974172
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,17,410.961,17000,-0.595238,0,-10,399.619


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-09_14-59-01
  done: false
  episode_len_mean: 397.46666666666664
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5555555555555556
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 45
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7432665189107257
          entropy_coeff: 0.009999999999999998
          kl: 0.007594163663337896
          policy_loss: -0.0014285477499167123
          total_loss: -0.0285578191280365
          vf_explained_var: -0.8837210536003113
          vf_loss: 0.0002796622691676021
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,18,433.049,18000,-0.555556,0,-10,397.467


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-09_14-59-22
  done: false
  episode_len_mean: 396.1489361702128
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5319148936170213
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 47
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.747673291630215
          entropy_coeff: 0.009999999999999998
          kl: 0.01125860078927467
          policy_loss: 0.02531888335943222
          total_loss: -0.0015795002174046305
          vf_explained_var: -0.18570055067539215
          vf_loss: 0.0005431667024418453
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,19,454.327,19000,-0.531915,0,-10,396.149


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-09_14-59-44
  done: false
  episode_len_mean: 394.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 50
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.766861006948683
          entropy_coeff: 0.009999999999999998
          kl: 0.011424539744888424
          policy_loss: 0.042797945274247066
          total_loss: 0.13151526716020373
          vf_explained_var: -0.5058658719062805
          vf_loss: 0.11635023388080298
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,20,475.998,20000,-0.6,0,-10,394.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-09_15-00-06
  done: false
  episode_len_mean: 393.6981132075472
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5660377358490566
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7506067593892416
          entropy_coeff: 0.009999999999999998
          kl: 0.0073867216094372625
          policy_loss: 0.03165206834673882
          total_loss: 0.007483401811785168
          vf_explained_var: -0.5710800290107727
          vf_loss: 0.0033143184807461995
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,21,498.194,21000,-0.566038,0,-10,393.698


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-09_15-00-27
  done: false
  episode_len_mean: 392.26785714285717
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5357142857142857
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 56
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.752124145295885
          entropy_coeff: 0.009999999999999998
          kl: 0.006560099821742415
          policy_loss: -0.018064670885602634
          total_loss: -0.04396636502610313
          vf_explained_var: -0.16852886974811554
          vf_loss: 0.0015990456786110169
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,22,519.562,22000,-0.535714,0,-10,392.268


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-09_15-00-48
  done: false
  episode_len_mean: 391.87931034482756
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5172413793103449
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 58
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.767403973473443
          entropy_coeff: 0.009999999999999998
          kl: 0.007218802583168424
          policy_loss: 0.060201653382844396
          total_loss: 0.03341350058714549
          vf_explained_var: -0.6969187259674072
          vf_loss: 0.0008633287455369201
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,23,540.437,23000,-0.517241,0,-10,391.879




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-09_15-01-29
  done: false
  episode_len_mean: 390.5245901639344
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4918032786885246
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 61
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.740456872516208
          entropy_coeff: 0.009999999999999998
          kl: 0.009922235102946115
          policy_loss: -0.029735145252197982
          total_loss: -0.050676388666033745
          vf_explained_var: -0.4238753318786621
          vf_loss: 0.006432316200031588
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,24,580.967,24000,-0.491803,0,-10,390.525


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-09_15-01-50
  done: false
  episode_len_mean: 389.59375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.46875
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 64
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7847859064737954
          entropy_coeff: 0.009999999999999998
          kl: 0.007692616565414302
          policy_loss: -0.02243415390451749
          total_loss: -0.049794700576199426
          vf_explained_var: -0.19469104707241058
          vf_loss: 0.000463271867192816
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,25,602.439,25000,-0.46875,0,-10,389.594


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-09_15-02-11
  done: false
  episode_len_mean: 389.3181818181818
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.45454545454545453
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.735106462902493
          entropy_coeff: 0.009999999999999998
          kl: 0.009047288283071648
          policy_loss: -0.027194186713960435
          total_loss: -0.05413402964671453
          vf_explained_var: -0.37447962164878845
          vf_loss: 0.00038295090117672874
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,26,623.057,26000,-0.454545,0,-10,389.318


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-09_15-02-32
  done: false
  episode_len_mean: 389.40579710144925
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.43478260869565216
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 69
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7030738645129735
          entropy_coeff: 0.009999999999999998
          kl: 0.009617463435051574
          policy_loss: -0.039091720494131246
          total_loss: -0.06551256372282903
          vf_explained_var: -0.7427729368209839
          vf_loss: 0.0005798376753874537
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,27,644.593,27000,-0.434783,0,-10,389.406


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-09_15-02-52
  done: false
  episode_len_mean: 389.59154929577466
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4225352112676056
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 71
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.7095376518037586
          entropy_coeff: 0.009999999999999998
          kl: 0.008672632755561733
          policy_loss: -0.0034881720112429726
          total_loss: -0.030031785948408975
          vf_explained_var: -1.0
          vf_loss: 0.0005246591699283777
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,28,664.052,28000,-0.422535,0,-10,389.592


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-09_15-03-12
  done: false
  episode_len_mean: 390.56756756756755
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.40540540540540543
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 74
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.675137554274665
          entropy_coeff: 0.009999999999999998
          kl: 0.008410771023606906
          policy_loss: 0.0076065704123013545
          total_loss: -0.018484293959207004
          vf_explained_var: 0.17392580211162567
          vf_loss: 0.0006342246727500525
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,29,684.439,29000,-0.405405,0,-10,390.568


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-09_15-03-31
  done: false
  episode_len_mean: 391.6842105263158
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.39473684210526316
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 76
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.6804011556837293
          entropy_coeff: 0.009999999999999998
          kl: 0.00912146905846015
          policy_loss: -0.0026126065601905185
          total_loss: -0.029108975165420107
          vf_explained_var: -0.09562104195356369
          vf_loss: 0.0002791376622755908
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,30,702.75,30000,-0.394737,0,-10,391.684


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-09_15-03-49
  done: false
  episode_len_mean: 393.0769230769231
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38461538461538464
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 78
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.600885017712911
          entropy_coeff: 0.009999999999999998
          kl: 0.012501135565939675
          policy_loss: -0.05911074603597323
          total_loss: -0.08481464948919085
          vf_explained_var: -0.4906626343727112
          vf_loss: 0.0002658796223436689
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,31,721.438,31000,-0.384615,0,-10,393.077


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-09_15-04-07
  done: false
  episode_len_mean: 394.58024691358025
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.37037037037037035
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 81
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.625839363204108
          entropy_coeff: 0.009999999999999998
          kl: 0.011962924006429827
          policy_loss: -0.02648068124221431
          total_loss: -0.052480397497614226
          vf_explained_var: -0.9319910407066345
          vf_loss: 0.00022129082620570746
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,32,739.216,32000,-0.37037,0,-10,394.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-09_15-04-26
  done: false
  episode_len_mean: 395.2168674698795
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3614457831325301
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 83
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.6739298264185587
          entropy_coeff: 0.009999999999999998
          kl: 0.00970005384822152
          policy_loss: -0.03814136665314436
          total_loss: -0.06442279605608847
          vf_explained_var: -0.5449137687683105
          vf_loss: 0.0004275552344754235
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,33,758.304,33000,-0.361446,0,-10,395.217


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-09_15-04-44
  done: false
  episode_len_mean: 395.94117647058823
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.35294117647058826
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 85
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.5921804507573447
          entropy_coeff: 0.009999999999999998
          kl: 0.010409862298858904
          policy_loss: -0.017092528608110218
          total_loss: -0.042759988953669864
          vf_explained_var: -0.44365450739860535
          vf_loss: 0.00022181553609294092
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,34,775.705,34000,-0.352941,0,-10,395.941


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-09_15-05-03
  done: false
  episode_len_mean: 396.45454545454544
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3409090909090909
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 88
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.610579138331943
          entropy_coeff: 0.009999999999999998
          kl: 0.008524496946236977
          policy_loss: -0.07018508745564354
          total_loss: -0.09603870428270764
          vf_explained_var: -0.6862525343894958
          vf_loss: 0.00022553145293689644
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,35,794.954,35000,-0.340909,0,-10,396.455




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-09_15-05-38
  done: false
  episode_len_mean: 396.2888888888889
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 90
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.5691409720314873
          entropy_coeff: 0.009999999999999998
          kl: 0.010456974825015121
          policy_loss: -0.004042189175056087
          total_loss: -0.029506944782204097
          vf_explained_var: 0.19727815687656403
          vf_loss: 0.00019397541424647595
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,36,829.716,36000,-0.333333,0,-10,396.289


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-09_15-05-55
  done: false
  episode_len_mean: 397.1847826086956
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.32608695652173914
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 92
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.5853434483210247
          entropy_coeff: 0.009999999999999998
          kl: 0.009228429939026563
          policy_loss: 0.0009428006493382983
          total_loss: -0.024618245164553323
          vf_explained_var: -0.7402529120445251
          vf_loss: 0.00026354858297660634
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,37,847.115,37000,-0.326087,0,-10,397.185


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-09_15-06-10
  done: false
  episode_len_mean: 399.12631578947367
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3157894736842105
  episode_reward_min: -10.0
  episodes_this_iter: 3
  episodes_total: 95
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.6117271131939357
          entropy_coeff: 0.009999999999999998
          kl: 0.008926623224501481
          policy_loss: -0.06115295332339075
          total_loss: -0.08695888167454137
          vf_explained_var: -0.8222525715827942
          vf_loss: 0.0002834466031345073
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,38,862.42,38000,-0.315789,0,-10,399.126


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-09_15-06-26
  done: false
  episode_len_mean: 400.5773195876289
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.30927835051546393
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 97
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.560220138231913
          entropy_coeff: 0.009999999999999998
          kl: 0.011391810410234725
          policy_loss: -0.04460733286622498
          total_loss: -0.06951179318130016
          vf_explained_var: -0.52155601978302
          vf_loss: 0.0006621407713383734
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,39,878.421,39000,-0.309278,0,-10,400.577


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-09_15-06-44
  done: false
  episode_len_mean: 401.4141414141414
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.30303030303030304
  episode_reward_min: -10.0
  episodes_this_iter: 2
  episodes_total: 99
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.543030203713311
          entropy_coeff: 0.009999999999999998
          kl: 0.009256789285862588
          policy_loss: -0.028998024264971414
          total_loss: -0.054068875561157866
          vf_explained_var: 0.05609817057847977
          vf_loss: 0.00033052176997646005
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,40,895.625,40000,-0.30303,0,-10,401.414


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-09_15-07-03
  done: false
  episode_len_mean: 402.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2
  episode_reward_min: -8.0
  episodes_this_iter: 2
  episodes_total: 101
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4645522091123793
          entropy_coeff: 0.009999999999999998
          kl: 0.007379306928377361
          policy_loss: -0.10928868498239253
          total_loss: -0.1337312719060315
          vf_explained_var: 0.4714851379394531
          vf_loss: 0.00017987502748534706
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,41,914.898,41000,-0.2,0,-8,402.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-09_15-07-23
  done: false
  episode_len_mean: 403.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 103
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4417466825909084
          entropy_coeff: 0.009999999999999998
          kl: 0.009368467625314765
          policy_loss: 0.1549471513264709
          total_loss: 0.1307942472398281
          vf_explained_var: -0.6844379901885986
          vf_loss: 0.00023528710407845211
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,42,934.494,42000,-0.05,0,-5,403.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-09_15-07-41
  done: false
  episode_len_mean: 404.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 106
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4648349205652873
          entropy_coeff: 0.009999999999999998
          kl: 0.010184675331895246
          policy_loss: 0.005063496612840229
          total_loss: -0.01929294860197438
          vf_explained_var: -0.5766132473945618
          vf_loss: 0.00026007627796692154
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,43,953.229,43000,-0.05,0,-5,404.44


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-09_15-08-01
  done: false
  episode_len_mean: 404.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 108
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4350251065360173
          entropy_coeff: 0.009999999999999998
          kl: 0.009678510861413327
          policy_loss: -0.06777897030115128
          total_loss: -0.09183820059729947
          vf_explained_var: -0.48909294605255127
          vf_loss: 0.0002607744219454212
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,44,973.085,44000,-0.05,0,-5,404.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-09_15-08-22
  done: false
  episode_len_mean: 404.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4407159778806897
          entropy_coeff: 0.009999999999999998
          kl: 0.008025338339744377
          policy_loss: -0.002143677406840854
          total_loss: -0.02620335966348648
          vf_explained_var: -0.883249044418335
          vf_loss: 0.00032239296787237335
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,45,993.452,45000,-0.05,0,-5,404.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-09_15-08-40
  done: false
  episode_len_mean: 404.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 113
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.509034609794617
          entropy_coeff: 0.009999999999999998
          kl: 0.013224536997784645
          policy_loss: -0.05826425130168597
          total_loss: -0.08275274624013239
          vf_explained_var: -0.9794014692306519
          vf_loss: 0.0005605244346144092
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,46,1012.21,46000,-0.05,0,-5,404.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-09_15-09-00
  done: false
  episode_len_mean: 405.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 115
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.464280676841736
          entropy_coeff: 0.009999999999999998
          kl: 0.008412976091808222
          policy_loss: -0.05256644818517897
          total_loss: -0.07686393136779467
          vf_explained_var: -0.683610737323761
          vf_loss: 0.00031903141126450563
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,47,1031.6,47000,-0.05,0,-5,405.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-09_15-09-18
  done: false
  episode_len_mean: 405.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4705659680896335
          entropy_coeff: 0.009999999999999998
          kl: 0.009018507567389342
          policy_loss: -0.07552649525718556
          total_loss: -0.09990826495405701
          vf_explained_var: -0.7982491254806519
          vf_loss: 0.0002957066276899746
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,48,1050.28,48000,-0.05,0,-5,405.53




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-09_15-09-55
  done: false
  episode_len_mean: 406.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.3414367304907904
          entropy_coeff: 0.009999999999999998
          kl: 0.015159994577862143
          policy_loss: -0.030672432233889897
          total_loss: -0.05375654813316133
          vf_explained_var: 0.07882977277040482
          vf_loss: 0.00028287810241130906
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,49,1086.97,49000,-0.05,0,-5,406.36


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-09_15-10-14
  done: false
  episode_len_mean: 406.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 122
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4732459439171683
          entropy_coeff: 0.009999999999999998
          kl: 0.012000583473084988
          policy_loss: -0.04025476121654113
          total_loss: -0.06471049425502619
          vf_explained_var: -0.12225598096847534
          vf_loss: 0.0002392240292263321
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,50,1106.28,50000,-0.05,0,-5,406.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-09_15-10-33
  done: false
  episode_len_mean: 407.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 124
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.548745033476088
          entropy_coeff: 0.009999999999999998
          kl: 0.011176474038571834
          policy_loss: -0.06997227987481489
          total_loss: -0.09518907190197044
          vf_explained_var: -1.0
          vf_loss: 0.00023573140060761943
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,51,1125,51000,-0.05,0,-5,407.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-09_15-10-53
  done: false
  episode_len_mean: 408.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 126
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.537495909796821
          entropy_coeff: 0.009999999999999998
          kl: 0.010339025065230465
          policy_loss: -0.11536694425675605
          total_loss: -0.14054883534295692
          vf_explained_var: -0.46721717715263367
          vf_loss: 0.00016075499297585338
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,52,1144.31,52000,-0.05,0,-5,408.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-09_15-11-13
  done: false
  episode_len_mean: 409.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 129
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.568503173192342
          entropy_coeff: 0.009999999999999998
          kl: 0.008678344828305734
          policy_loss: -0.05346477698120806
          total_loss: -0.07894170992076396
          vf_explained_var: -0.6834637522697449
          vf_loss: 0.00018097769162624092
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,53,1164.59,53000,-0.05,0,-5,409.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-09_15-11-34
  done: false
  episode_len_mean: 409.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 131
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.5898052242067124
          entropy_coeff: 0.009999999999999998
          kl: 0.011046766479427008
          policy_loss: -0.01696582076450189
          total_loss: -0.04254196733236313
          vf_explained_var: -0.9628962874412537
          vf_loss: 0.00028738560075806973
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,54,1186.19,54000,-0.05,0,-5,409.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-09_15-11-56
  done: false
  episode_len_mean: 409.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 134
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.565952028168572
          entropy_coeff: 0.009999999999999998
          kl: 0.013012808667153344
          policy_loss: -0.0630023223761883
          total_loss: -0.08847338776621554
          vf_explained_var: -0.8353697061538696
          vf_loss: 0.0001477896134828269
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,55,1208.21,55000,-0.05,0,-5,409.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-09_15-12-18
  done: false
  episode_len_mean: 410.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 136
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.568463924196031
          entropy_coeff: 0.009999999999999998
          kl: 0.008118602245237158
          policy_loss: -0.04163514744076464
          total_loss: -0.06713986458877723
          vf_explained_var: -0.7187998294830322
          vf_loss: 0.00015455209375229767
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,56,1229.92,56000,-0.05,0,-5,410.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-09_15-12-41
  done: false
  episode_len_mean: 411.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 139
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.44428149594201
          entropy_coeff: 0.009999999999999998
          kl: 0.011814540112029783
          policy_loss: -0.07932253811094496
          total_loss: -0.10358499478962686
          vf_explained_var: -0.8732729554176331
          vf_loss: 0.00014343679524447199
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,57,1252.43,57000,-0.05,0,-5,411.3


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-09_15-13-02
  done: false
  episode_len_mean: 411.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 2
  episodes_total: 141
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.450995087623596
          entropy_coeff: 0.009999999999999998
          kl: 0.013789148584357038
          policy_loss: -0.033108876335124174
          total_loss: -0.057358519256942804
          vf_explained_var: -0.9963732957839966
          vf_loss: 0.00021721665883281578
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,58,1273.49,58000,-0.05,0,-5,411.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-09_15-13-23
  done: false
  episode_len_mean: 413.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 144
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.440985533926222
          entropy_coeff: 0.009999999999999998
          kl: 0.014690368664478207
          policy_loss: 0.03679350276167194
          total_loss: 0.012578236766987376
          vf_explained_var: 0.12073888629674911
          vf_loss: 0.00014868082410733526
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,59,1294.69,59000,-0.05,0,-5,413.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-09_15-13-45
  done: false
  episode_len_mean: 413.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -5.0
  episodes_this_iter: 3
  episodes_total: 147
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4882962067921954
          entropy_coeff: 0.009999999999999998
          kl: 0.00976045560474071
          policy_loss: -0.06795769615305794
          total_loss: -0.09262587701280912
          vf_explained_var: -1.0
          vf_loss: 0.00018427841261857086
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,60,1317.05,60000,-0.05,0,-5,413.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-09_15-14-09
  done: false
  episode_len_mean: 413.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 149
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.445694571071201
          entropy_coeff: 0.009999999999999998
          kl: 0.012380136403269364
          policy_loss: -0.022762408687008753
          total_loss: -0.04701974878294601
          vf_explained_var: -0.6646129488945007
          vf_loss: 0.00016092160335069315
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,61,1340.58,61000,0,0,0,413.31




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-09_15-14-48
  done: false
  episode_len_mean: 413.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 152
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.476668259832594
          entropy_coeff: 0.009999999999999998
          kl: 0.009733152845673863
          policy_loss: 0.05123751349747181
          total_loss: 0.02658142652362585
          vf_explained_var: -0.7113504409790039
          vf_loss: 8.017727060279058e-05
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,62,1379.61,62000,0,0,0,413.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-09_15-15-09
  done: false
  episode_len_mean: 413.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 155
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.4018918063905503
          entropy_coeff: 0.009999999999999998
          kl: 0.011113185592119863
          policy_loss: 0.019222861611180836
          total_loss: -0.00461765651901563
          vf_explained_var: -0.9632623195648193
          vf_loss: 0.00014367224175657612
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,63,1400.64,63000,0,0,0,413.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-09_15-15-30
  done: false
  episode_len_mean: 414.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 157
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.3947931740019057
          entropy_coeff: 0.009999999999999998
          kl: 0.007799091856864098
          policy_loss: -0.025943030334181254
          total_loss: -0.04977190949850612
          vf_explained_var: -0.25405943393707275
          vf_loss: 9.468218793497524e-05
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,64,1421.3,64000,0,0,0,414.37


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-09_15-15-51
  done: false
  episode_len_mean: 415.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 160
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.356024185816447
          entropy_coeff: 0.009999999999999998
          kl: 0.010662043698883478
          policy_loss: -0.16776170945829816
          total_loss: -0.19111866686079237
          vf_explained_var: -0.9624335169792175
          vf_loss: 0.00016996428716487975
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,65,1442.09,65000,0,0,0,415.02


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-09_15-16-11
  done: false
  episode_len_mean: 416.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 162
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.373092034127977
          entropy_coeff: 0.009999999999999998
          kl: 0.010722877058513101
          policy_loss: -0.052918620738718246
          total_loss: -0.07646425397445758
          vf_explained_var: -0.7299266457557678
          vf_loss: 0.00015177706102096838
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,66,1462.8,66000,0,0,0,416.09


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-09_15-16-31
  done: false
  episode_len_mean: 416.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 164
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.3141723367902967
          entropy_coeff: 0.009999999999999998
          kl: 0.010417282812420374
          policy_loss: -0.05574496856166257
          total_loss: -0.07869509605483876
          vf_explained_var: -0.9849169254302979
          vf_loss: 0.00015904367816498658
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,67,1482.32,67000,0,0,0,416.3


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-09_15-16-49
  done: false
  episode_len_mean: 417.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 167
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.2617956585354273
          entropy_coeff: 0.009999999999999998
          kl: 0.010373712015154999
          policy_loss: -0.011618935420281357
          total_loss: -0.033861441537737846
          vf_explained_var: -0.35834434628486633
          vf_loss: 0.00034303156583822175
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,68,1500.49,68000,0,0,0,417.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-09_15-17-08
  done: false
  episode_len_mean: 418.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 169
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.350656252437168
          entropy_coeff: 0.009999999999999998
          kl: 0.009669149609951688
          policy_loss: -0.00442705609732204
          total_loss: -0.027732924703094693
          vf_explained_var: -0.8002883791923523
          vf_loss: 0.00017047646577136928
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,69,1519.17,69000,0,0,0,418.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-09_15-17-28
  done: false
  episode_len_mean: 419.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 172
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.322822682062785
          entropy_coeff: 0.009999999999999998
          kl: 0.01229618591567632
          policy_loss: 0.0028223560088210635
          total_loss: -0.020190064857403437
          vf_explained_var: -0.15828213095664978
          vf_loss: 0.00017737925073662254
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,70,1539.06,70000,0,0,0,419.03


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-09_15-17-46
  done: false
  episode_len_mean: 419.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 174
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.2676279253429836
          entropy_coeff: 0.009999999999999998
          kl: 0.008206836681420013
          policy_loss: -0.1172503982981046
          total_loss: -0.1396757670574718
          vf_explained_var: -0.4591027498245239
          vf_loss: 0.0002252635423954214
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,71,1557.65,71000,0,0,0,419.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-09_15-18-06
  done: false
  episode_len_mean: 419.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 176
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.2047211541069878
          entropy_coeff: 0.009999999999999998
          kl: 0.010229904727048912
          policy_loss: 0.0034676404462920295
          total_loss: -0.018393161023656526
          vf_explained_var: -0.5247876644134521
          vf_loss: 0.00015444278566671224
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,72,1577.23,72000,0,0,0,419.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-09_15-18-24
  done: false
  episode_len_mean: 419.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 178
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.3613383611043295
          entropy_coeff: 0.009999999999999998
          kl: 0.013229968307831068
          policy_loss: -0.0364862362957663
          total_loss: -0.059746835670537415
          vf_explained_var: 0.20639580488204956
          vf_loss: 0.0003114413454669476
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,73,1595.11,73000,0,0,0,419.73




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-09_15-18-58
  done: false
  episode_len_mean: 419.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.227650725841522
          entropy_coeff: 0.009999999999999998
          kl: 0.013172459274642535
          policy_loss: 0.018484296484125987
          total_loss: -0.0033384895159138572
          vf_explained_var: -0.4088258445262909
          vf_loss: 0.0004125569647941221
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,74,1629.18,74000,0,0,0,419.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-09_15-19-16
  done: false
  episode_len_mean: 419.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 183
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.222700990570916
          entropy_coeff: 0.009999999999999998
          kl: 0.010573073047468403
          policy_loss: -0.03350950446393755
          total_loss: -0.055550455550352734
          vf_explained_var: -0.3208636939525604
          vf_loss: 0.00015301865730887383
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,75,1647.42,75000,0,0,0,419.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-09_15-19-34
  done: false
  episode_len_mean: 420.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 185
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.068968031141493
          entropy_coeff: 0.009999999999999998
          kl: 0.01313095026674826
          policy_loss: -0.05126176240543524
          total_loss: -0.0717457238998678
          vf_explained_var: -0.9452893733978271
          vf_loss: 0.0001646859015535382
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,76,1665.37,76000,0,0,0,420.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-09_15-19-52
  done: false
  episode_len_mean: 421.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 187
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.2922282377878824
          entropy_coeff: 0.009999999999999998
          kl: 0.007853721615853735
          policy_loss: -0.1362877584165997
          total_loss: -0.15896299655238788
          vf_explained_var: -0.5141886472702026
          vf_loss: 0.00022250121967065043
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,77,1683.24,77000,0,0,0,421.94


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-09_15-20-12
  done: false
  episode_len_mean: 421.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 190
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.1543350325690374
          entropy_coeff: 0.009999999999999998
          kl: 0.012730479458931162
          policy_loss: -0.020438182312581273
          total_loss: -0.041578258035911454
          vf_explained_var: -0.19125837087631226
          vf_loss: 0.0003634898181189783
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,78,1703.09,78000,0,0,0,421.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-09_15-20-29
  done: false
  episode_len_mean: 420.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 192
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.144740276866489
          entropy_coeff: 0.009999999999999998
          kl: 0.012368283452986075
          policy_loss: 0.08496271040704516
          total_loss: 0.06399087210496267
          vf_explained_var: 0.06855780631303787
          vf_loss: 0.0004369158144982066
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,79,1720.27,79000,0,0,0,420.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-09_15-20-49
  done: false
  episode_len_mean: 419.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 195
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.131298944685194
          entropy_coeff: 0.009999999999999998
          kl: 0.009044370943518985
          policy_loss: -0.017834035803874333
          total_loss: -0.038912065492735966
          vf_explained_var: -0.39087867736816406
          vf_loss: 0.00020669826578038433
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,80,1740.5,80000,0,0,0,419.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-09_15-21-09
  done: false
  episode_len_mean: 416.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 198
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.092074059115516
          entropy_coeff: 0.009999999999999998
          kl: 0.011408008858639088
          policy_loss: -0.009047204587194655
          total_loss: -0.02970229900545544
          vf_explained_var: -0.6451937556266785
          vf_loss: 0.0002299951481594083
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,81,1760.18,81000,0,0,0,416.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-09_15-21-28
  done: false
  episode_len_mean: 416.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 200
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.20894040134218
          entropy_coeff: 0.009999999999999998
          kl: 0.012000200045933933
          policy_loss: -0.054567246552970676
          total_loss: -0.07632506241401037
          vf_explained_var: -0.6225420832633972
          vf_loss: 0.00029408343155713134
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,82,1779.45,82000,0,0,0,416.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-09_15-21-49
  done: false
  episode_len_mean: 412.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 203
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.145531921916538
          entropy_coeff: 0.009999999999999998
          kl: 0.01408960051274768
          policy_loss: -0.0038984853360388015
          total_loss: -0.0248043538381656
          vf_explained_var: -0.12973123788833618
          vf_loss: 0.000505420951715981
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,83,1800.63,83000,0,0,0,412.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-09_15-22-11
  done: false
  episode_len_mean: 410.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 206
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.1604588508605955
          entropy_coeff: 0.009999999999999998
          kl: 0.01253829248973309
          policy_loss: -0.02995159129301707
          total_loss: -0.05132724651032024
          vf_explained_var: -0.4248601794242859
          vf_loss: 0.00018975006185226247
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,84,1822.4,84000,0,0,0,410.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-09_15-22-34
  done: false
  episode_len_mean: 407.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 209
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.085181172688802
          entropy_coeff: 0.009999999999999998
          kl: 0.014703799319222425
          policy_loss: -0.022553058423929743
          total_loss: -0.04317752064930068
          vf_explained_var: -0.9895347356796265
          vf_loss: 0.00018139938264438469
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,85,1845.47,85000,0,0,0,407.3




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-09_15-23-12
  done: false
  episode_len_mean: 405.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 212
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.0610658393965826
          entropy_coeff: 0.009999999999999998
          kl: 0.014617790289179035
          policy_loss: -0.04428210761398077
          total_loss: -0.06463632869223754
          vf_explained_var: -0.5778048038482666
          vf_loss: 0.0002107569845166937
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,86,1883.21,86000,0,0,0,405.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-09_15-23-32
  done: false
  episode_len_mean: 403.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 214
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.1642228921254474
          entropy_coeff: 0.009999999999999998
          kl: 0.012809769908622126
          policy_loss: -0.029669082464857235
          total_loss: -0.05111959754592842
          vf_explained_var: -1.0
          vf_loss: 0.0001516846006981925
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,87,1903.34,87000,0,0,0,403.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-09_15-23-53
  done: false
  episode_len_mean: 400.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.1976837317148843
          entropy_coeff: 0.009999999999999998
          kl: 0.014452591019874422
          policy_loss: -0.026298945893843968
          total_loss: -0.04806816904909081
          vf_explained_var: -0.8758071660995483
          vf_loss: 0.00016244910447211523
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,88,1924.21,88000,0,0,0,400.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-09_15-24-13
  done: false
  episode_len_mean: 399.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 220
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.3218068467246162
          entropy_coeff: 0.009999999999999998
          kl: 0.010264455288937905
          policy_loss: -0.04211341780092981
          total_loss: -0.06512165856030253
          vf_explained_var: -0.24384267628192902
          vf_loss: 0.0001777518809022796
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,89,1943.98,89000,0,0,0,399.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-09_15-24-32
  done: false
  episode_len_mean: 398.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 222
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.132879991001553
          entropy_coeff: 0.009999999999999998
          kl: 0.013087787573906147
          policy_loss: -0.06141128871175978
          total_loss: -0.0825905793863866
          vf_explained_var: -0.7009994983673096
          vf_loss: 0.00010860855505193791
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,90,1963.05,90000,0,0,0,398.86


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-09_15-24-49
  done: false
  episode_len_mean: 397.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 224
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.2208752420213487
          entropy_coeff: 0.009999999999999998
          kl: 0.009187790080201274
          policy_loss: -0.06493970654490921
          total_loss: -0.08706755585347613
          vf_explained_var: -0.16950510442256927
          vf_loss: 5.2189372945576905e-05
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,91,1980.09,91000,0,0,0,397.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-09_15-25-14
  done: false
  episode_len_mean: 393.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 228
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.0539027015368143
          entropy_coeff: 0.009999999999999998
          kl: 0.012483861289335095
          policy_loss: -0.07421425059437751
          total_loss: -0.09460934851732519
          vf_explained_var: -0.9015882015228271
          vf_loss: 0.00010491509771478983
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,92,2004.63,92000,0,0,0,393.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-09_15-25-37
  done: false
  episode_len_mean: 392.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 231
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.036001083585951
          entropy_coeff: 0.009999999999999998
          kl: 0.01327695459893437
          policy_loss: -0.08798767721487416
          total_loss: -0.10813127648499277
          vf_explained_var: -0.7907484173774719
          vf_loss: 0.00017491960938463712
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,93,2027.55,93000,0,0,0,392.34


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-09_15-26-01
  done: false
  episode_len_mean: 390.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 234
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.018157302008735
          entropy_coeff: 0.009999999999999998
          kl: 0.012414521139848908
          policy_loss: -0.1238494448363781
          total_loss: -0.14390124074286884
          vf_explained_var: -0.9863114356994629
          vf_loss: 9.098194221021712e-05
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,94,2051.89,94000,0,0,0,390.36


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-09_15-26-22
  done: false
  episode_len_mean: 389.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 237
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.9552664372656081
          entropy_coeff: 0.009999999999999998
          kl: 0.012794139624027916
          policy_loss: -0.0011359384076462852
          total_loss: -0.020543079761167368
          vf_explained_var: -0.7407287359237671
          vf_loss: 0.00010554151663705448
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,95,2073.42,95000,0,0,0,389


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-09_15-26-44
  done: false
  episode_len_mean: 388.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 239
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7679659022225274
          entropy_coeff: 0.009999999999999998
          kl: 0.008365173872079475
          policy_loss: -0.08689721963471836
          total_loss: -0.10439469309316741
          vf_explained_var: -0.6693298816680908
          vf_loss: 0.0001560434637617113
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,96,2094.47,96000,0,0,0,388.82




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-09_15-27-21
  done: false
  episode_len_mean: 387.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 242
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6911805550257364
          entropy_coeff: 0.009999999999999998
          kl: 0.010995666854934426
          policy_loss: -0.05204412074138721
          total_loss: -0.06883679913977782
          vf_explained_var: -0.8292525410652161
          vf_loss: 8.476491863499784e-05
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,97,2131.98,97000,0,0,0,387.49


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-09_15-27-43
  done: false
  episode_len_mean: 386.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 245
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8293482489056057
          entropy_coeff: 0.009999999999999998
          kl: 0.019676955611824712
          policy_loss: 0.006517901395757993
          total_loss: -0.011538899482952223
          vf_explained_var: -0.19867034256458282
          vf_loss: 0.0001751914404546066
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,98,2153.53,98000,0,0,0,386.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-09_15-28-04
  done: false
  episode_len_mean: 386.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 248
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8327135192023383
          entropy_coeff: 0.009999999999999998
          kl: 0.012965373910576124
          policy_loss: -0.03325154644747575
          total_loss: -0.0514050196028418
          vf_explained_var: -0.7593660354614258
          vf_loss: 0.00013314343377714976
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,99,2175.24,99000,0,0,0,386.07


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-09_15-28-27
  done: false
  episode_len_mean: 386.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 250
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.5878170715437996
          entropy_coeff: 0.009999999999999998
          kl: 0.008914101773062594
          policy_loss: -0.002964423348506292
          total_loss: -0.018678380880090925
          vf_explained_var: -0.8305172324180603
          vf_loss: 0.00013635670184157788
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,100,2198.19,100000,0,0,0,386.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-09_15-28-50
  done: false
  episode_len_mean: 384.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 254
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6444857319196065
          entropy_coeff: 0.009999999999999998
          kl: 0.012037227804348167
          policy_loss: 0.003391495512591468
          total_loss: -0.012842449214723375
          vf_explained_var: -0.11352115124464035
          vf_loss: 0.0001732926424463383
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,101,2221.31,101000,0,0,0,384.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-09_15-29-14
  done: false
  episode_len_mean: 381.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 257
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7386402593718635
          entropy_coeff: 0.009999999999999998
          kl: 0.009906910731072966
          policy_loss: -0.07387184573130476
          total_loss: -0.09116117652091715
          vf_explained_var: -0.37203165888786316
          vf_loss: 6.611256057011713e-05
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,102,2245.01,102000,0,0,0,381.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-09_15-29-37
  done: false
  episode_len_mean: 380.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 259
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8421046296755472
          entropy_coeff: 0.009999999999999998
          kl: 0.008907931771490555
          policy_loss: -0.046502353913254205
          total_loss: -0.06464895498421457
          vf_explained_var: 0.12953370809555054
          vf_loss: 0.0002466090466542583
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,103,2267.51,103000,0,0,0,380.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-09_15-29-58
  done: false
  episode_len_mean: 379.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 262
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.767840900686052
          entropy_coeff: 0.009999999999999998
          kl: 0.009497732423488486
          policy_loss: -0.01907120785779423
          total_loss: -0.03662237206266986
          vf_explained_var: -0.781218945980072
          vf_loss: 9.7562176759109e-05
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,104,2289.13,104000,0,0,0,379.79


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-09_15-30-19
  done: false
  episode_len_mean: 378.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 265
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7825364258554246
          entropy_coeff: 0.009999999999999998
          kl: 0.012381315634496678
          policy_loss: -0.030191282348500357
          total_loss: -0.04785696988304456
          vf_explained_var: -0.7201476097106934
          vf_loss: 0.00012098655048854804
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,105,2310.19,105000,0,0,0,378.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-09_15-30-41
  done: false
  episode_len_mean: 376.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 268
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6513494624031915
          entropy_coeff: 0.009999999999999998
          kl: 0.009680362485223071
          policy_loss: -0.1530752672917313
          total_loss: -0.16948179747495387
          vf_explained_var: -0.5819435119628906
          vf_loss: 7.671130948033856e-05
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,106,2331.93,106000,0,0,0,376.35




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-09_15-31-18
  done: false
  episode_len_mean: 375.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 270
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.850080414613088
          entropy_coeff: 0.009999999999999998
          kl: 0.01373861713234332
          policy_loss: -0.014936741731233067
          total_loss: -0.0332754915787114
          vf_explained_var: -0.7555676102638245
          vf_loss: 0.00011912077582868127
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,107,2369.07,107000,0,0,0,375.56


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-09_15-31-41
  done: false
  episode_len_mean: 374.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 273
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8536574376953974
          entropy_coeff: 0.009999999999999998
          kl: 0.017217361761223966
          policy_loss: -0.023000009854634604
          total_loss: -0.04140841778781679
          vf_explained_var: -0.39312833547592163
          vf_loss: 7.436117719205666e-05
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,108,2392.21,108000,0,0,0,374.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-09_15-32-02
  done: false
  episode_len_mean: 372.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 276
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7910836338996887
          entropy_coeff: 0.009999999999999998
          kl: 0.01889405420721554
          policy_loss: -0.09380883549650511
          total_loss: -0.11142532399131191
          vf_explained_var: 0.13104136288166046
          vf_loss: 0.0002353029630386219
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,109,2413.14,109000,0,0,0,372.31


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-09_15-32-21
  done: false
  episode_len_mean: 371.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 278
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.783830592367384
          entropy_coeff: 0.009999999999999998
          kl: 0.013123122481440373
          policy_loss: 0.04222708625925912
          total_loss: 0.02459688815805647
          vf_explained_var: 0.4861195981502533
          vf_loss: 0.00016709815447231652
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,110,2431.49,110000,0,0,0,371.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-09_15-32-41
  done: false
  episode_len_mean: 371.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 280
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.577880981233385
          entropy_coeff: 0.009999999999999998
          kl: 0.01044267798827369
          policy_loss: -0.022050994137922924
          total_loss: -0.03763782911830478
          vf_explained_var: 0.3900274932384491
          vf_loss: 0.0001593417252782577
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,111,2451.98,111000,0,0,0,371.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-09_15-33-03
  done: false
  episode_len_mean: 370.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 283
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7972912020153469
          entropy_coeff: 0.009999999999999998
          kl: 0.011088996919618961
          policy_loss: -0.06277121160593298
          total_loss: -0.08060483659307162
          vf_explained_var: -0.32611486315727234
          vf_loss: 0.00010463290479189406
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,112,2473.29,112000,0,0,0,370.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-09_15-33-25
  done: false
  episode_len_mean: 367.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 286
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.812442782190111
          entropy_coeff: 0.009999999999999998
          kl: 0.012047056402498965
          policy_loss: -0.05149554614391592
          total_loss: -0.06939894441101287
          vf_explained_var: -0.8611239194869995
          vf_loss: 0.00018338238334965557
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,113,2495.39,113000,0,0,0,367.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-09_15-33-48
  done: false
  episode_len_mean: 366.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 289
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8001066274113124
          entropy_coeff: 0.009999999999999998
          kl: 0.01568635908124784
          policy_loss: -0.023022836446762084
          total_loss: -0.04089832752943039
          vf_explained_var: -0.35505276918411255
          vf_loss: 7.65580587742281e-05
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,114,2519.05,114000,0,0,0,366.34


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-09_15-34-10
  done: false
  episode_len_mean: 366.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 291
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8290798664093018
          entropy_coeff: 0.009999999999999998
          kl: 0.012283302001311548
          policy_loss: -0.027702542518575988
          total_loss: -0.045839288954933485
          vf_explained_var: -0.2868104875087738
          vf_loss: 0.00011566814504880717
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,115,2540.85,115000,0,0,0,366.31


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-09_15-34-32
  done: false
  episode_len_mean: 364.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 294
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7376277168591818
          entropy_coeff: 0.009999999999999998
          kl: 0.013446460725021175
          policy_loss: -0.022694475907418462
          total_loss: -0.03989302048252689
          vf_explained_var: -0.2901662290096283
          vf_loss: 0.00013571128009870235
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,116,2562.37,116000,0,0,0,364.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-09_15-34-53
  done: false
  episode_len_mean: 364.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 296
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7116985771391127
          entropy_coeff: 0.009999999999999998
          kl: 0.010736354608310981
          policy_loss: -0.033125686479939354
          total_loss: -0.050035156806310016
          vf_explained_var: -0.11760305613279343
          vf_loss: 0.0001739647797270057
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,117,2583.83,117000,0,0,0,364.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-09_15-35-16
  done: false
  episode_len_mean: 363.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 299
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.9859497692849901
          entropy_coeff: 0.009999999999999998
          kl: 0.011517933201717253
          policy_loss: 0.021807295415136548
          total_loss: 0.0021080551462041006
          vf_explained_var: -0.366049587726593
          vf_loss: 0.00012426550270498005
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,118,2606.34,118000,0,0,0,363.62




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-09_15-35-53
  done: false
  episode_len_mean: 363.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 302
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.0555977132585315
          entropy_coeff: 0.009999999999999998
          kl: 0.014334023512064399
          policy_loss: -0.05325395663579305
          total_loss: -0.07370016343063779
          vf_explained_var: -0.00569114601239562
          vf_loss: 6.49747134755469e-05
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,119,2643.93,119000,0,0,0,363.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-09_15-36-14
  done: false
  episode_len_mean: 364.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 305
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8326853275299073
          entropy_coeff: 0.009999999999999998
          kl: 0.011523359025088503
          policy_loss: -0.05013928305771616
          total_loss: -0.06831182833347056
          vf_explained_var: 0.6495673060417175
          vf_loss: 0.0001182972627753366
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,120,2664.1,120000,0,0,0,364.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-09_15-36-32
  done: false
  episode_len_mean: 365.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 307
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8204902092615762
          entropy_coeff: 0.009999999999999998
          kl: 0.011032090435762025
          policy_loss: -0.06634370336929957
          total_loss: -0.08433025843567318
          vf_explained_var: -0.2554301917552948
          vf_loss: 0.00018387056164404688
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,121,2682.87,121000,0,0,0,365


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-09_15-36-52
  done: false
  episode_len_mean: 367.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 309
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.865172513326009
          entropy_coeff: 0.009999999999999998
          kl: 0.014239083378372493
          policy_loss: -0.04283749444617165
          total_loss: -0.061368013421694435
          vf_explained_var: -0.3394540846347809
          vf_loss: 7.670775790352814e-05
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,122,2702.9,122000,0,0,0,367.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-09_15-37-13
  done: false
  episode_len_mean: 367.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 312
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8787393556700813
          entropy_coeff: 0.009999999999999998
          kl: 0.017748501943292656
          policy_loss: -0.0848937615338299
          total_loss: -0.10352958809170458
          vf_explained_var: 0.26031503081321716
          vf_loss: 9.610301264425894e-05
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,123,2722.98,123000,0,0,0,367.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-09_15-37-31
  done: false
  episode_len_mean: 369.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 314
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8407539168993632
          entropy_coeff: 0.009999999999999998
          kl: 0.009673379771754673
          policy_loss: -0.008780675753951073
          total_loss: -0.027023128461506632
          vf_explained_var: -0.9223127365112305
          vf_loss: 0.00013485642634299843
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,124,2741.85,124000,0,0,0,369.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-09_15-37-52
  done: false
  episode_len_mean: 370.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 317
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8460662802060446
          entropy_coeff: 0.009999999999999998
          kl: 0.01257858032751339
          policy_loss: -0.05717751387920644
          total_loss: -0.07553661006192366
          vf_explained_var: -0.5585795640945435
          vf_loss: 6.225654619306119e-05
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,125,2762.02,125000,0,0,0,370.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-09_15-38-12
  done: false
  episode_len_mean: 370.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 319
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7865031878153483
          entropy_coeff: 0.009999999999999998
          kl: 0.007803438262245364
          policy_loss: -0.0356166075501177
          total_loss: -0.05338488680620988
          vf_explained_var: -0.6565423607826233
          vf_loss: 7.23670399464835e-05
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,126,2782.67,126000,0,0,0,370.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-09_15-38-30
  done: false
  episode_len_mean: 372.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7721057150099013
          entropy_coeff: 0.009999999999999998
          kl: 0.012674869804865372
          policy_loss: -0.024156102538108827
          total_loss: -0.04176635301361482
          vf_explained_var: -0.7801542282104492
          vf_loss: 7.119434940250358e-05
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,127,2800.38,127000,0,0,0,372.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-09_15-38-48
  done: false
  episode_len_mean: 373.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 324
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.706884549723731
          entropy_coeff: 0.009999999999999998
          kl: 0.01377384018160917
          policy_loss: -0.018559640376932092
          total_loss: -0.03548823700596889
          vf_explained_var: -0.40142303705215454
          vf_loss: 9.720278810871403e-05
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,128,2818.38,128000,0,0,0,373.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-09_15-39-06
  done: false
  episode_len_mean: 375.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 326
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.701506245136261
          entropy_coeff: 0.009999999999999998
          kl: 0.006229889589380989
          policy_loss: -0.12651148330834178
          total_loss: -0.14347387908233536
          vf_explained_var: -0.7449097633361816
          vf_loss: 3.319905988771805e-05
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,129,2836.27,129000,0,0,0,375.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-09_15-39-23
  done: false
  episode_len_mean: 379.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 328
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.758418122927348
          entropy_coeff: 0.009999999999999998
          kl: 0.01919576378608914
          policy_loss: -0.037031456993685825
          total_loss: -0.05436470707257589
          vf_explained_var: -0.19999630749225616
          vf_loss: 0.00019094325669155094
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,130,2852.9,130000,0,0,0,379.36




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-09_15-39-57
  done: false
  episode_len_mean: 382.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 330
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6920328974723815
          entropy_coeff: 0.009999999999999998
          kl: 0.009369207974555508
          policy_loss: -0.04999643042683601
          total_loss: -0.06686906011568175
          vf_explained_var: -1.0
          vf_loss: 1.8420632871614848e-05
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,131,2887.3,131000,0,0,0,382.07


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-09_15-40-15
  done: false
  episode_len_mean: 383.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 332
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8284900890456306
          entropy_coeff: 0.009999999999999998
          kl: 0.013465660202957015
          policy_loss: -0.09154598149988387
          total_loss: -0.10974542639321751
          vf_explained_var: -0.34725236892700195
          vf_loss: 4.337211398200856e-05
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,132,2905.55,132000,0,0,0,383.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-09_15-40-34
  done: false
  episode_len_mean: 387.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 335
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8153907537460328
          entropy_coeff: 0.009999999999999998
          kl: 0.01255238447409472
          policy_loss: -0.08129978084729778
          total_loss: -0.09931124887532658
          vf_explained_var: -0.41688263416290283
          vf_loss: 0.00010321313770368255
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,133,2924.17,133000,0,0,0,387.91


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-09_15-40-51
  done: false
  episode_len_mean: 389.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 337
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6173755168914794
          entropy_coeff: 0.009999999999999998
          kl: 0.007534533749591274
          policy_loss: -0.01598330649236838
          total_loss: -0.03211005094150702
          vf_explained_var: -0.7716098427772522
          vf_loss: 2.3465727594561437e-05
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,134,2941.09,134000,0,0,0,389.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-09_15-41-09
  done: false
  episode_len_mean: 391.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 339
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.5414422154426575
          entropy_coeff: 0.009999999999999998
          kl: 0.008661707204541595
          policy_loss: 0.031900685611698364
          total_loss: 0.01654376449684302
          vf_explained_var: -0.2890508472919464
          vf_loss: 3.0432890768376333e-05
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,135,2959.1,135000,0,0,0,391.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-09_15-41-25
  done: false
  episode_len_mean: 394.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 341
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.5865398526191712
          entropy_coeff: 0.009999999999999998
          kl: 0.012245398290807922
          policy_loss: -0.020347752918799717
          total_loss: -0.03612953548630079
          vf_explained_var: -0.9949418902397156
          vf_loss: 4.534708417243868e-05
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,136,2974.72,136000,0,0,0,394.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-09_15-41-40
  done: false
  episode_len_mean: 397.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 343
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.4914623022079467
          entropy_coeff: 0.009999999999999998
          kl: 0.011774320318205255
          policy_loss: -0.05299000889062881
          total_loss: -0.06784188449382782
          vf_explained_var: -0.595230758190155
          vf_loss: 2.5955356381422865e-05
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,137,2989.88,137000,0,0,0,397.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-09_15-41-58
  done: false
  episode_len_mean: 398.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 345
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6333648774358962
          entropy_coeff: 0.009999999999999998
          kl: 0.011320103149151153
          policy_loss: -0.038493360641101995
          total_loss: -0.05469473865297106
          vf_explained_var: -0.9975253343582153
          vf_loss: 9.689767979984431e-05
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,138,3007.89,138000,0,0,0,398.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-09_15-42-13
  done: false
  episode_len_mean: 402.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 347
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.681359346707662
          entropy_coeff: 0.009999999999999998
          kl: 0.009965763381794012
          policy_loss: -0.017087004085381826
          total_loss: -0.03370050274663501
          vf_explained_var: -0.4577208161354065
          vf_loss: 0.0001689473056709782
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,139,3022.79,139000,0,0,0,402.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-09_15-42-30
  done: false
  episode_len_mean: 405.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 349
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7921497344970703
          entropy_coeff: 0.009999999999999998
          kl: 0.011677368271096306
          policy_loss: -0.05902641365925471
          total_loss: -0.07681522427333726
          vf_explained_var: -0.6875420212745667
          vf_loss: 9.619647320909684e-05
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,140,3039.9,140000,0,0,0,405.47


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-09_15-42-49
  done: false
  episode_len_mean: 407.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 351
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8604309135013157
          entropy_coeff: 0.009999999999999998
          kl: 0.013970266695958605
          policy_loss: -0.0037713455657164257
          total_loss: -0.022255683773093754
          vf_explained_var: -0.37314996123313904
          vf_loss: 7.631733733433066e-05
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,141,3058.87,141000,0,0,0,407.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-09_15-43-06
  done: false
  episode_len_mean: 410.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 353
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8361974080403647
          entropy_coeff: 0.009999999999999998
          kl: 0.017136713543779554
          policy_loss: 0.0013350907299253675
          total_loss: -0.01692278716299269
          vf_explained_var: -0.4603888690471649
          vf_loss: 5.054493354287438e-05
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,142,3076.33,142000,0,0,0,410.2


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-09_15-43-27
  done: false
  episode_len_mean: 412.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 356
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.6499021490414938
          entropy_coeff: 0.009999999999999998
          kl: 0.013122924522419687
          policy_loss: -0.02071201710237397
          total_loss: -0.03714002850982878
          vf_explained_var: -0.9070490598678589
          vf_loss: 3.0000694136510397e-05
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,143,3096.96,143000,0,0,0,412.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-09_15-43-43
  done: false
  episode_len_mean: 415.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 358
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8649657514360216
          entropy_coeff: 0.009999999999999998
          kl: 0.009826726906860926
          policy_loss: 0.008032245292431779
          total_loss: -0.010546838968164392
          vf_explained_var: -0.6501753926277161
          vf_loss: 3.986589949312878e-05
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,144,3113.24,144000,0,0,0,415.32




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-09_15-44-19
  done: false
  episode_len_mean: 416.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 360
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7603941096199884
          entropy_coeff: 0.009999999999999998
          kl: 0.007807088123719956
          policy_loss: -0.019516170935498345
          total_loss: -0.03701341272228294
          vf_explained_var: -0.7546815872192383
          vf_loss: 8.230394665184172e-05
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,145,3148.98,145000,0,0,0,416.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-09_15-44-40
  done: false
  episode_len_mean: 417.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 363
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7661957555347019
          entropy_coeff: 0.009999999999999998
          kl: 0.015255312019640973
          policy_loss: -0.07881278209388257
          total_loss: -0.096345206308696
          vf_explained_var: -0.862668514251709
          vf_loss: 8.186075691709346e-05
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,146,3169.85,146000,0,0,0,417.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-09_15-45-01
  done: false
  episode_len_mean: 416.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 365
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8395878394444785
          entropy_coeff: 0.009999999999999998
          kl: 0.012834783448459557
          policy_loss: 0.04706991596354378
          total_loss: 0.02874672462542852
          vf_explained_var: -0.22183769941329956
          vf_loss: 3.257529336527417e-05
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,147,3190.59,147000,0,0,0,416.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-09_15-45-19
  done: false
  episode_len_mean: 419.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 368
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7943525049421523
          entropy_coeff: 0.009999999999999998
          kl: 0.011324297128755647
          policy_loss: -0.11432907755176226
          total_loss: -0.13219742162360085
          vf_explained_var: -0.5835985541343689
          vf_loss: 3.9791867614743144e-05
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,148,3209,148000,0,0,0,419.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-09_15-45-37
  done: false
  episode_len_mean: 420.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 370
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.901957803302341
          entropy_coeff: 0.009999999999999998
          kl: 0.013539563082045486
          policy_loss: -0.025310181495216157
          total_loss: -0.0442489404645231
          vf_explained_var: -0.6028187274932861
          vf_loss: 3.850575184413982e-05
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,149,3227.01,149000,0,0,0,420.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-09_15-45-57
  done: false
  episode_len_mean: 422.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 372
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.9494694550832112
          entropy_coeff: 0.009999999999999998
          kl: 0.010802580123622403
          policy_loss: -0.02103431856052743
          total_loss: -0.04042585372096962
          vf_explained_var: -0.3876015841960907
          vf_loss: 6.940374363087662e-05
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,150,3246.9,150000,0,0,0,422.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-09_15-46-15
  done: false
  episode_len_mean: 422.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 374
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8981078770425586
          entropy_coeff: 0.009999999999999998
          kl: 0.013768645203554798
          policy_loss: -0.03779507097270754
          total_loss: -0.05668814861112171
          vf_explained_var: -0.8739738464355469
          vf_loss: 4.497481147862143e-05
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,151,3264.99,151000,0,0,0,422.49


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-09_15-46-36
  done: false
  episode_len_mean: 425.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 377
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.8362424241171942
          entropy_coeff: 0.009999999999999998
          kl: 0.014466262844621427
          policy_loss: 0.05306520644161436
          total_loss: 0.03481763758593136
          vf_explained_var: -0.40361425280570984
          vf_loss: 6.964868840037121e-05
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,152,3285.42,152000,0,0,0,425.23


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-09_15-46-55
  done: false
  episode_len_mean: 425.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 379
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.7096639805369906
          entropy_coeff: 0.009999999999999998
          kl: 0.023649211565082626
          policy_loss: -0.049505221678151023
          total_loss: -0.06622781107823054
          vf_explained_var: 0.44114363193511963
          vf_loss: 0.0003001481033505924
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,153,3304.84,153000,0,0,0,425.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-09_15-47-15
  done: false
  episode_len_mean: 425.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 381
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0046875
          cur_lr: 5.000000000000001e-05
          entropy: 1.739018232292599
          entropy_coeff: 0.009999999999999998
          kl: 0.013229834675820484
          policy_loss: -0.017593701556324958
          total_loss: -0.03473846498462889
          vf_explained_var: 0.4414299428462982
          vf_loss: 0.0001834028633311391
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,154,3324.86,154000,0,0,0,425.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-09_15-47-36
  done: false
  episode_len_mean: 427.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 384
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0046875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8286021153132122
          entropy_coeff: 0.009999999999999998
          kl: 0.02177658351164015
          policy_loss: -0.10996218191252814
          total_loss: -0.12794882667561372
          vf_explained_var: -0.6375928521156311
          vf_loss: 0.0001972984261657176
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,155,3345.72,155000,0,0,0,427.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-09_15-47-56
  done: false
  episode_len_mean: 427.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 386
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.74776664574941
          entropy_coeff: 0.009999999999999998
          kl: 0.014180529856419098
          policy_loss: -0.016376285751660665
          total_loss: -0.03356868492232429
          vf_explained_var: 0.013668937608599663
          vf_loss: 0.0001855577149803543
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,156,3365.7,156000,0,0,0,427.99


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-09_15-48-14
  done: false
  episode_len_mean: 430.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 388
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.693778465853797
          entropy_coeff: 0.009999999999999998
          kl: 0.016152374500295667
          policy_loss: 0.08793003658453623
          total_loss: 0.0712440470026599
          vf_explained_var: -0.19891567528247833
          vf_loss: 0.00013822480442387232
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,157,3383.6,157000,0,0,0,430.81




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-09_15-48-51
  done: false
  episode_len_mean: 431.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 391
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.625627417034573
          entropy_coeff: 0.009999999999999998
          kl: 0.012867015969018682
          policy_loss: -0.05234737214114931
          total_loss: -0.06831738568014568
          vf_explained_var: -0.07953639328479767
          vf_loss: 0.00019579178745819567
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,158,3421.15,158000,0,0,0,431.99


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-09_15-49-07
  done: false
  episode_len_mean: 434.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 392
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.6213497069146898
          entropy_coeff: 0.009999999999999998
          kl: 0.015573782303806204
          policy_loss: -0.02213223667608367
          total_loss: -0.038134766577018635
          vf_explained_var: 0.4515998959541321
          vf_loss: 0.00010146290078409948
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,159,3436.68,159000,0,0,0,434.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-09_15-49-23
  done: false
  episode_len_mean: 436.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 394
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.725774387518565
          entropy_coeff: 0.009999999999999998
          kl: 0.009961585871992766
          policy_loss: -0.0059776428673002455
          total_loss: -0.023052721553378636
          vf_explained_var: -0.20437787473201752
          vf_loss: 0.0001126219497463252
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,160,3453.02,160000,0,0,0,436.71


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-09_15-49-41
  done: false
  episode_len_mean: 439.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 396
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.751977511246999
          entropy_coeff: 0.009999999999999998
          kl: 0.010332417021993173
          policy_loss: -0.019874205854203967
          total_loss: -0.037221237934297983
          vf_explained_var: -0.2323797047138214
          vf_loss: 0.00010009466511999361
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,161,3470.8,161000,0,0,0,439.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-09_15-50-02
  done: false
  episode_len_mean: 439.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 399
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.5572167409790887
          entropy_coeff: 0.009999999999999998
          kl: 0.009943035979777647
          policy_loss: -0.097049946586291
          total_loss: -0.1124244189924664
          vf_explained_var: 0.08306393027305603
          vf_loss: 0.0001277814698470239
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,162,3491.27,162000,0,0,0,439.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-09_15-50-18
  done: false
  episode_len_mean: 442.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 401
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.6814451376597086
          entropy_coeff: 0.009999999999999998
          kl: 0.01282932728663811
          policy_loss: -0.018714004506667454
          total_loss: -0.03534285310241911
          vf_explained_var: -0.06814315915107727
          vf_loss: 9.539447015009097e-05
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,163,3507.86,163000,0,0,0,442.86


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-09_15-50-39
  done: false
  episode_len_mean: 443.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 404
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.825882093111674
          entropy_coeff: 0.009999999999999998
          kl: 0.011143645650693618
          policy_loss: -0.11490615208943684
          total_loss: -0.1330046541781889
          vf_explained_var: -0.5930744409561157
          vf_loss: 8.19611264079059e-05
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,164,3528.99,164000,0,0,0,443.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-09_15-50-57
  done: false
  episode_len_mean: 444.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 406
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7923869927724203
          entropy_coeff: 0.009999999999999998
          kl: 0.009966689498827976
          policy_loss: -0.12689614875449073
          total_loss: -0.1446469325158331
          vf_explained_var: -0.2971605658531189
          vf_loss: 0.00010301042780661697
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,165,3546.41,165000,0,0,0,444.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-09_15-51-17
  done: false
  episode_len_mean: 444.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 408
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.5385098391109042
          entropy_coeff: 0.009999999999999998
          kl: 0.008619839009273277
          policy_loss: -0.0011613750623332129
          total_loss: -0.016426207704676524
          vf_explained_var: -0.7853887677192688
          vf_loss: 5.9658109421434347e-05
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,166,3566.44,166000,0,0,0,444.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-09_15-51-34
  done: false
  episode_len_mean: 445.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 410
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.535850497086843
          entropy_coeff: 0.009999999999999998
          kl: 0.006012002329302943
          policy_loss: 0.04789287398258845
          total_loss: 0.032641065493226054
          vf_explained_var: 0.13416266441345215
          vf_loss: 6.44253898321444e-05
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,167,3583.52,167000,0,0,0,445.66


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-09_15-51-50
  done: false
  episode_len_mean: 448.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 412
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7391629020373027
          entropy_coeff: 0.009999999999999998
          kl: 0.010421116207564888
          policy_loss: 0.0345521023703946
          total_loss: 0.017314767589171727
          vf_explained_var: -0.6565656065940857
          vf_loss: 8.101793876752102e-05
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,168,3599.54,168000,0,0,0,448.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-09_15-52-06
  done: false
  episode_len_mean: 450.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 414
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8319886631435818
          entropy_coeff: 0.009999999999999998
          kl: 0.01183500885165382
          policy_loss: -0.0225369052340587
          total_loss: -0.04068126243849595
          vf_explained_var: -0.7707067728042603
          vf_loss: 9.231282229949203e-05
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,169,3615.58,169000,0,0,0,450.73


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-09_15-52-21
  done: false
  episode_len_mean: 454.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 416
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8517103287908765
          entropy_coeff: 0.009999999999999998
          kl: 0.011208531386588581
          policy_loss: -0.054147018192129005
          total_loss: -0.07246706676152018
          vf_explained_var: -0.31547775864601135
          vf_loss: 0.00011824649148669171
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,170,3631,170000,0,0,0,454.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-09_15-52-40
  done: false
  episode_len_mean: 456.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 418
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.788584005832672
          entropy_coeff: 0.009999999999999998
          kl: 0.015299777691546446
          policy_loss: 0.07511610312180386
          total_loss: 0.057415446804629434
          vf_explained_var: -0.37481170892715454
          vf_loss: 7.76073724814018e-05
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,171,3649.93,171000,0,0,0,456.55




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-09_15-53-16
  done: false
  episode_len_mean: 457.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 420
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7548754453659057
          entropy_coeff: 0.009999999999999998
          kl: 0.009984861282650767
          policy_loss: -0.05710701665116681
          total_loss: -0.07444356671637958
          vf_explained_var: -0.34712979197502136
          vf_loss: 0.00014199813295918932
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,172,3685.12,172000,0,0,0,457.02


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-09_15-53-36
  done: false
  episode_len_mean: 456.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 422
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.847973652680715
          entropy_coeff: 0.009999999999999998
          kl: 0.01467317256248137
          policy_loss: -0.08390911892056466
          total_loss: -0.10222439956333902
          vf_explained_var: -0.5263182520866394
          vf_loss: 6.128392947680873e-05
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,173,3705.33,173000,0,0,0,456.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-09_15-53-52
  done: false
  episode_len_mean: 457.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 424
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8092558966742622
          entropy_coeff: 0.009999999999999998
          kl: 0.013311439725762418
          policy_loss: -0.036515207340319954
          total_loss: -0.05439736379517449
          vf_explained_var: -0.6523586511611938
          vf_loss: 0.00011680338448059047
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,174,3721.79,174000,0,0,0,457.71


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-09_15-54-10
  done: false
  episode_len_mean: 457.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 426
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9243861979908414
          entropy_coeff: 0.009999999999999998
          kl: 0.011750534626063174
          policy_loss: -0.0996553124446008
          total_loss: -0.11876446501248412
          vf_explained_var: -0.6604120135307312
          vf_loss: 5.208969333681226e-05
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,175,3739.22,175000,0,0,0,457.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-09_15-54-30
  done: false
  episode_len_mean: 455.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 429
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.722788589530521
          entropy_coeff: 0.009999999999999998
          kl: 0.01223290547581573
          policy_loss: -0.08586668074131013
          total_loss: -0.1029220373266273
          vf_explained_var: -0.42344191670417786
          vf_loss: 8.651717164159183e-05
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,176,3759.26,176000,0,0,0,455.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-09_15-54-48
  done: false
  episode_len_mean: 456.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 431
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7943379534615411
          entropy_coeff: 0.009999999999999998
          kl: 0.012706840385051689
          policy_loss: -0.05834294077422884
          total_loss: -0.07604973643190331
          vf_explained_var: -0.25377359986305237
          vf_loss: 0.0001472363881475758
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,177,3777.14,177000,0,0,0,456.69


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-09_15-55-05
  done: false
  episode_len_mean: 457.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 433
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8575461811489529
          entropy_coeff: 0.009999999999999998
          kl: 0.012299908275673854
          policy_loss: 0.007101403352701001
          total_loss: -0.011161862417227691
          vf_explained_var: -0.9188305139541626
          vf_loss: 0.000225712879263382
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,178,3794.08,178000,0,0,0,457.53


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-09_15-55-22
  done: false
  episode_len_mean: 459.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 435
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.80266066259808
          entropy_coeff: 0.009999999999999998
          kl: 0.018623159569355997
          policy_loss: 0.06449938639998436
          total_loss: 0.04679934423830774
          vf_explained_var: -0.40643310546875
          vf_loss: 0.00019561828473039591
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,179,3811.41,179000,0,0,0,459.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-09_15-55-40
  done: false
  episode_len_mean: 459.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 437
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.880177546872033
          entropy_coeff: 0.009999999999999998
          kl: 0.0137817495562827
          policy_loss: 0.0903697226403488
          total_loss: 0.07175502739846706
          vf_explained_var: -0.3466048836708069
          vf_loss: 9.017756207564768e-05
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,180,3829.26,180000,0,0,0,459.31


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-09_15-55-59
  done: false
  episode_len_mean: 459.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 439
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.838924511273702
          entropy_coeff: 0.009999999999999998
          kl: 0.015592778969715285
          policy_loss: -0.058856046779288185
          total_loss: -0.07689541975657145
          vf_explained_var: -0.5610204935073853
          vf_loss: 0.00024023449227065106
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,181,3848,181000,0,0,0,459.69


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-09_15-56-18
  done: false
  episode_len_mean: 457.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 441
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9115799877378676
          entropy_coeff: 0.009999999999999998
          kl: 0.014550910382156938
          policy_loss: -0.00980608906182978
          total_loss: -0.028743295412924554
          vf_explained_var: 0.049948256462812424
          vf_loss: 7.628214698343072e-05
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,182,3867.46,182000,0,0,0,457.79


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-09_15-56-38
  done: false
  episode_len_mean: 454.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 444
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8073137799898784
          entropy_coeff: 0.009999999999999998
          kl: 0.01565423222849876
          policy_loss: -0.0478058911446068
          total_loss: -0.06571422471768326
          vf_explained_var: -0.7788659930229187
          vf_loss: 5.47347349109057e-05
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,183,3887.77,183000,0,0,0,454.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-09_15-56-56
  done: false
  episode_len_mean: 454.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 446
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9186894999610054
          entropy_coeff: 0.009999999999999998
          kl: 0.012003642594751776
          policy_loss: -0.010013075338469611
          total_loss: -0.029084038568867578
          vf_explained_var: -1.0
          vf_loss: 3.1534892403012943e-05
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,184,3905.67,184000,0,0,0,454.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-09_15-57-14
  done: false
  episode_len_mean: 454.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 448
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9337972124417624
          entropy_coeff: 0.009999999999999998
          kl: 0.012142003035460727
          policy_loss: 0.018203683156106206
          total_loss: -0.0009889257864819632
          vf_explained_var: -0.3166123032569885
          vf_loss: 5.99898013307312e-05
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,185,3922.91,185000,0,0,0,454.67




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-09_15-57-49
  done: false
  episode_len_mean: 454.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 450
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9650402267773945
          entropy_coeff: 0.009999999999999998
          kl: 0.011807143940073397
          policy_loss: 0.013204002214802637
          total_loss: -0.006303379695034689
          vf_explained_var: -0.9823881387710571
          vf_loss: 6.000098490201506e-05
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,186,3958.16,186000,0,0,0,454.01


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-09_15-58-10
  done: false
  episode_len_mean: 453.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 453
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8275651031070286
          entropy_coeff: 0.009999999999999998
          kl: 0.013252879178256401
          policy_loss: -0.010444658663537767
          total_loss: -0.028571220487356185
          vf_explained_var: -0.39025846123695374
          vf_loss: 5.590448818111326e-05
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,187,3978.79,187000,0,0,0,453.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-09_15-58-31
  done: false
  episode_len_mean: 453.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 455
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.793597850534651
          entropy_coeff: 0.009999999999999998
          kl: 0.010402537974359008
          policy_loss: -0.01315511961778005
          total_loss: -0.030948703156577217
          vf_explained_var: -0.7997217178344727
          vf_loss: 6.925149666333002e-05
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,188,4000.25,188000,0,0,0,453.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-09_15-58-50
  done: false
  episode_len_mean: 451.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 458
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8975350115034315
          entropy_coeff: 0.009999999999999998
          kl: 0.01028902084395013
          policy_loss: -0.0183300761712922
          total_loss: -0.037192206664217846
          vf_explained_var: -0.7973954081535339
          vf_loss: 4.087438673094665e-05
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,189,4019.54,189000,0,0,0,451.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-09_15-59-11
  done: false
  episode_len_mean: 450.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 460
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8148484839333427
          entropy_coeff: 0.009999999999999998
          kl: 0.010514813804473189
          policy_loss: -0.05359478315545453
          total_loss: -0.0715994567092922
          vf_explained_var: -1.0
          vf_loss: 6.987672786635812e-05
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,190,4039.88,190000,0,0,0,450.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-09_15-59-31
  done: false
  episode_len_mean: 450.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 463
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9763655185699462
          entropy_coeff: 0.009999999999999998
          kl: 0.012080154158094509
          policy_loss: 0.013567764477597343
          total_loss: -0.0060616585115591684
          vf_explained_var: -0.9344716668128967
          vf_loss: 4.9290281559579956e-05
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,191,4060.56,191000,0,0,0,450.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-09_15-59-52
  done: false
  episode_len_mean: 451.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 465
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9186081714100307
          entropy_coeff: 0.009999999999999998
          kl: 0.01375816345514459
          policy_loss: -0.05015413934985797
          total_loss: -0.0692057217988703
          vf_explained_var: -0.5928470492362976
          vf_loss: 3.776312116517349e-05
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,192,4080.91,192000,0,0,0,451.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-09_16-00-11
  done: false
  episode_len_mean: 450.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 468
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9620353976885478
          entropy_coeff: 0.009999999999999998
          kl: 0.01192755827399919
          policy_loss: -0.049437999228636426
          total_loss: -0.06893666850195991
          vf_explained_var: -0.3386099636554718
          vf_loss: 3.7819638580711196e-05
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,193,4099.98,193000,0,0,0,450.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-09_16-00-30
  done: false
  episode_len_mean: 450.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 470
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8591241425938076
          entropy_coeff: 0.009999999999999998
          kl: 0.012349788850769549
          policy_loss: -0.015357382016049491
          total_loss: -0.033782269722885554
          vf_explained_var: -0.9895790815353394
          vf_loss: 7.951850630989712e-05
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,194,4119.01,194000,0,0,0,450.49


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-09_16-00-50
  done: false
  episode_len_mean: 449.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 473
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9668074131011963
          entropy_coeff: 0.009999999999999998
          kl: 0.009599139227996902
          policy_loss: -0.029499180366595587
          total_loss: -0.0490721279134353
          vf_explained_var: -0.3950995206832886
          vf_loss: 2.76343274890678e-05
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,195,4139.27,195000,0,0,0,449.08


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-09_16-01-09
  done: false
  episode_len_mean: 448.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 475
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.6505668110317655
          entropy_coeff: 0.009999999999999998
          kl: 0.007326594807790106
          policy_loss: -0.05473397992965248
          total_loss: -0.07115211563391818
          vf_explained_var: -0.984559953212738
          vf_loss: 3.601860656393304e-05
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,196,4158.51,196000,0,0,0,448.37


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-09_16-01-30
  done: false
  episode_len_mean: 447.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 477
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8726422203911675
          entropy_coeff: 0.009999999999999998
          kl: 0.010965560353430506
          policy_loss: -0.026792161787549656
          total_loss: -0.04536733025064071
          vf_explained_var: -0.6205481886863708
          vf_loss: 7.414971411587127e-05
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,197,4178.91,197000,0,0,0,447.79




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-09_16-02-05
  done: false
  episode_len_mean: 445.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 480
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.908802690770891
          entropy_coeff: 0.009999999999999998
          kl: 0.012214284082175019
          policy_loss: -0.13064831764333779
          total_loss: -0.14961094717598625
          vf_explained_var: -0.40630102157592773
          vf_loss: 3.951840442621485e-05
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,198,4214.43,198000,0,0,0,445.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-09_16-02-24
  done: false
  episode_len_mean: 446.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 482
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8810038447380066
          entropy_coeff: 0.009999999999999998
          kl: 0.013066297823560764
          policy_loss: 0.046045792433950634
          total_loss: 0.02741672131750319
          vf_explained_var: -0.5966601371765137
          vf_loss: 8.909592595753363e-05
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,199,4233.11,199000,0,0,0,446.44


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-09_16-02-43
  done: false
  episode_len_mean: 446.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 485
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9032908797264099
          entropy_coeff: 0.009999999999999998
          kl: 0.01522631794510614
          policy_loss: -0.030869161296221945
          total_loss: -0.049744763142532775
          vf_explained_var: -0.4988439679145813
          vf_loss: 5.024745979527425e-05
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,200,4252.26,200000,0,0,0,446.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-09_16-03-03
  done: false
  episode_len_mean: 445.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 487
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.957079197300805
          entropy_coeff: 0.009999999999999998
          kl: 0.012715363089695755
          policy_loss: 0.09060555953118536
          total_loss: 0.07117613212515911
          vf_explained_var: -0.204570010304451
          vf_loss: 5.195948294082579e-05
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,201,4271.82,201000,0,0,0,445.53


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-09_16-03-22
  done: false
  episode_len_mean: 444.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 489
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.8512922949261135
          entropy_coeff: 0.009999999999999998
          kl: 0.011847801668042064
          policy_loss: -0.02051788709229893
          total_loss: -0.03886494098438157
          vf_explained_var: -0.26549702882766724
          vf_loss: 8.256656777424116e-05
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,202,4290.76,202000,0,0,0,444.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-09_16-03-40
  done: false
  episode_len_mean: 443.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 492
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9618671668900385
          entropy_coeff: 0.009999999999999998
          kl: 0.014527583903066018
          policy_loss: -0.09666345096710656
          total_loss: -0.11609283280041482
          vf_explained_var: -0.6560520529747009
          vf_loss: 8.714151196424307e-05
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,203,4309.16,203000,0,0,0,443.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-09_16-03-59
  done: false
  episode_len_mean: 442.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 494
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.925492795308431
          entropy_coeff: 0.009999999999999998
          kl: 0.01478767075092959
          policy_loss: -0.06537467887004217
          total_loss: -0.08438829324311681
          vf_explained_var: -0.5453851819038391
          vf_loss: 0.00013733800644533605
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,204,4327.92,204000,0,0,0,442.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-09_16-04-19
  done: false
  episode_len_mean: 439.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 496
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9356845604048836
          entropy_coeff: 0.009999999999999998
          kl: 0.01269431132127118
          policy_loss: -0.06970117050740454
          total_loss: -0.0889098082565599
          vf_explained_var: -0.828281819820404
          vf_loss: 5.895011009771325e-05
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,205,4347.78,205000,0,0,0,439.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-09_16-04-39
  done: false
  episode_len_mean: 440.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 499
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.71362028254403
          entropy_coeff: 0.009999999999999998
          kl: 0.0262111921348394
          policy_loss: -0.06246107150283125
          total_loss: -0.07933907645444076
          vf_explained_var: -0.276409387588501
          vf_loss: 7.389838000461976e-05
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,206,4368.07,206000,0,0,0,440.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-09_16-04-58
  done: false
  episode_len_mean: 438.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 501
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010546874999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9530021945635478
          entropy_coeff: 0.009999999999999998
          kl: 0.041150077852422424
          policy_loss: 0.09366988895667924
          total_loss: 0.07463632623354594
          vf_explained_var: 0.055012691766023636
          vf_loss: 6.245384582952183e-05
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,207,4386.55,207000,0,0,0,438.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-09_16-05-17
  done: false
  episode_len_mean: 439.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 504
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.908736448817783
          entropy_coeff: 0.009999999999999998
          kl: 0.013632740585534325
          policy_loss: -0.04407530828482575
          total_loss: -0.0628822839508454
          vf_explained_var: -0.4630299508571625
          vf_loss: 6.471304792891412e-05
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,208,4406.16,208000,0,0,0,439.02


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-09_16-05-35
  done: false
  episode_len_mean: 438.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 506
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.824567511346605
          entropy_coeff: 0.009999999999999998
          kl: 0.013725137726524405
          policy_loss: -0.020400709576076932
          total_loss: -0.03834414436585373
          vf_explained_var: -0.7310506105422974
          vf_loss: 8.510387762928278e-05
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,209,4423.96,209000,0,0,0,438.2


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-09_16-05-52
  done: false
  episode_len_mean: 440.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 508
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8376974238289727
          entropy_coeff: 0.009999999999999998
          kl: 0.013792689425340784
          policy_loss: -0.032322968294223146
          total_loss: -0.05040117700894674
          vf_explained_var: -0.6159078478813171
          vf_loss: 8.055993550644618e-05
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,210,4440.69,210000,0,0,0,440.53




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-09_16-06-31
  done: false
  episode_len_mean: 437.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 511
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8281964911354913
          entropy_coeff: 0.009999999999999998
          kl: 0.010926332658646625
          policy_loss: 0.032652597253521286
          total_loss: 0.014651364212234815
          vf_explained_var: -0.9475390911102295
          vf_loss: 0.00010787382117188018
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,211,4479.63,211000,0,0,0,437.23


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-09_16-06-46
  done: false
  episode_len_mean: 436.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 513
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.899515328142378
          entropy_coeff: 0.009999999999999998
          kl: 0.013533151925082334
          policy_loss: -0.026091988301939435
          total_loss: -0.0447930588076512
          vf_explained_var: -0.028051989153027534
          vf_loss: 7.998727077291631e-05
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,212,4494.8,212000,0,0,0,436.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-09_16-07-02
  done: false
  episode_len_mean: 435.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 515
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6737702581617566
          entropy_coeff: 0.009999999999999998
          kl: 0.016867509069497987
          policy_loss: -0.092787891253829
          total_loss: -0.1088213471074899
          vf_explained_var: -0.011959163472056389
          vf_loss: 0.0004373979337591057
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,213,4510.85,213000,0,0,0,435.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-09_16-07-21
  done: false
  episode_len_mean: 433.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 517
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7782210999064976
          entropy_coeff: 0.009999999999999998
          kl: 0.013125885712024714
          policy_loss: -0.06261317332585653
          total_loss: -0.07989625169171227
          vf_explained_var: -0.3616238534450531
          vf_loss: 0.0002914754394926907
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,214,4529.84,214000,0,0,0,433.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-09_16-07-38
  done: false
  episode_len_mean: 432.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 519
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0012553228272334
          entropy_coeff: 0.009999999999999998
          kl: 0.01489411732658449
          policy_loss: -0.05782570911364423
          total_loss: -0.07752601237346729
          vf_explained_var: -0.6579868197441101
          vf_loss: 7.66209567599516e-05
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,215,4546.28,215000,0,0,0,432.6


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-09_16-07-59
  done: false
  episode_len_mean: 432.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 522
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.935628314812978
          entropy_coeff: 0.009999999999999998
          kl: 0.013739181160423422
          policy_loss: -0.0329279338940978
          total_loss: -0.0519528969294495
          vf_explained_var: -0.8538214564323425
          vf_loss: 0.0001139624515619491
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,216,4567.3,216000,0,0,0,432.44


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-09_16-08-17
  done: false
  episode_len_mean: 430.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 524
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.990524819162157
          entropy_coeff: 0.009999999999999998
          kl: 0.011085138549099585
          policy_loss: -0.030693195511897406
          total_loss: -0.05036857809043593
          vf_explained_var: -1.0
          vf_loss: 5.449542889740163e-05
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,217,4585.55,217000,0,0,0,430.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-09_16-08-36
  done: false
  episode_len_mean: 429.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 526
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8947503023677401
          entropy_coeff: 0.009999999999999998
          kl: 0.012881879474905588
          policy_loss: -0.019387205503880976
          total_loss: -0.03806681192169587
          vf_explained_var: 0.001984884263947606
          vf_loss: 6.410322215136452e-05
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,218,4605.08,218000,0,0,0,429.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-09_16-08-56
  done: false
  episode_len_mean: 428.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 529
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.625099515914917
          entropy_coeff: 0.009999999999999998
          kl: 0.010200033994875766
          policy_loss: 0.022160373917884297
          total_loss: 0.00616724801560243
          vf_explained_var: -0.7567110061645508
          vf_loss: 9.650202199635613e-05
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,219,4624.96,219000,0,0,0,428.53


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-09_16-09-14
  done: false
  episode_len_mean: 428.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 531
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8308948742018805
          entropy_coeff: 0.009999999999999998
          kl: 0.011424716259657231
          policy_loss: -0.045813803974952966
          total_loss: -0.06380494141744243
          vf_explained_var: -0.44819504022598267
          vf_loss: 0.0001370710246394285
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,220,4642.94,220000,0,0,0,428.35


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-09_16-09-32
  done: false
  episode_len_mean: 427.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 533
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6862361431121826
          entropy_coeff: 0.009999999999999998
          kl: 0.01477821370933964
          policy_loss: -0.06493104952904913
          total_loss: -0.08139310379823049
          vf_explained_var: -0.2737819254398346
          vf_loss: 0.00016651137092493526
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,221,4660.35,221000,0,0,0,427.08


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-09_16-09-48
  done: false
  episode_len_mean: 427.15
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 535
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9686066296365525
          entropy_coeff: 0.009999999999999998
          kl: 0.010414632139687423
          policy_loss: -0.032650830017195806
          total_loss: -0.05211217651764552
          vf_explained_var: -0.3459118604660034
          vf_loss: 5.995550994460549e-05
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,222,4676.95,222000,0,0,0,427.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-09_16-10-07
  done: false
  episode_len_mean: 426.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 538
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9995272623168097
          entropy_coeff: 0.009999999999999998
          kl: 0.014543055886723583
          policy_loss: -0.055380827519628736
          total_loss: -0.07505179146925609
          vf_explained_var: -0.8462936282157898
          vf_loss: 9.423310200670838e-05
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,223,4695.8,223000,0,0,0,426.94




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-09_16-10-41
  done: false
  episode_len_mean: 426.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 540
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7686997916963365
          entropy_coeff: 0.009999999999999998
          kl: 0.014248342771627698
          policy_loss: 0.017180033152302106
          total_loss: -0.00013157942642768225
          vf_explained_var: -0.3619594871997833
          vf_loss: 0.00014997643148591226
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,224,4729.95,224000,0,0,0,426.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-09_16-11-00
  done: false
  episode_len_mean: 428.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 542
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6837827801704406
          entropy_coeff: 0.009999999999999998
          kl: 0.013211126697478745
          policy_loss: -0.0749515804151694
          total_loss: -0.0912856554819478
          vf_explained_var: -0.5007671117782593
          vf_loss: 0.00029475263808207173
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,225,4748.63,225000,0,0,0,428.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-09_16-11-18
  done: false
  episode_len_mean: 429.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 544
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6586836046642728
          entropy_coeff: 0.009999999999999998
          kl: 0.012308723102367056
          policy_loss: -0.04938880507316854
          total_loss: -0.06572578003009161
          vf_explained_var: -0.5495082139968872
          vf_loss: 5.5131603625745306e-05
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,226,4766.88,226000,0,0,0,429.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-09_16-11-37
  done: false
  episode_len_mean: 429.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 546
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7198659817377726
          entropy_coeff: 0.009999999999999998
          kl: 0.011945381975273487
          policy_loss: -0.05836928677227762
          total_loss: -0.0752888101670477
          vf_explained_var: -0.10501791536808014
          vf_loss: 9.015772674400877e-05
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,227,4785.43,227000,0,0,0,429.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-09_16-11-54
  done: false
  episode_len_mean: 428.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 548
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8481740262773303
          entropy_coeff: 0.009999999999999998
          kl: 0.007091720365131475
          policy_loss: -0.10759815014898777
          total_loss: -0.12590023221241103
          vf_explained_var: -0.600493848323822
          vf_loss: 6.746653299261298e-05
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,228,4802.32,228000,0,0,0,428.73


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-09_16-12-12
  done: false
  episode_len_mean: 430.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 550
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.683923966354794
          entropy_coeff: 0.009999999999999998
          kl: 0.012372305996344016
          policy_loss: -0.06641222101946671
          total_loss: -0.08294580082098643
          vf_explained_var: -0.6006527543067932
          vf_loss: 0.00010992533199088131
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,229,4819.95,229000,0,0,0,430.41


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-09_16-12-29
  done: false
  episode_len_mean: 431.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 552
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8264898419380189
          entropy_coeff: 0.009999999999999998
          kl: 0.014963401044627586
          policy_loss: 0.032465206252204046
          total_loss: 0.01449615690443251
          vf_explained_var: -0.08694285899400711
          vf_loss: 5.911979747502806e-05
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,230,4837.65,230000,0,0,0,431.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-09_16-12-47
  done: false
  episode_len_mean: 432.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 554
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9843426439497205
          entropy_coeff: 0.009999999999999998
          kl: 0.019343838680107143
          policy_loss: 0.037546875948707266
          total_loss: 0.018110471467177074
          vf_explained_var: -0.3980475962162018
          vf_loss: 0.00010099571211160057
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,231,4855.09,231000,0,0,0,432.62


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-09_16-13-04
  done: false
  episode_len_mean: 435.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 556
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.959283471107483
          entropy_coeff: 0.009999999999999998
          kl: 0.01198246124267866
          policy_loss: 0.003483214146561093
          total_loss: -0.015784028006924522
          vf_explained_var: -0.6542240381240845
          vf_loss: 0.0001360283668165923
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,232,4871.97,232000,0,0,0,435.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-09_16-13-19
  done: false
  episode_len_mean: 438.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 558
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6633772015571595
          entropy_coeff: 0.009999999999999998
          kl: 0.014409007967616677
          policy_loss: -0.10240708059734768
          total_loss: -0.11870223585930136
          vf_explained_var: -0.04194872826337814
          vf_loss: 0.00011065762494884741
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,233,4887.02,233000,0,0,0,438.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-09_16-13-37
  done: false
  episode_len_mean: 440.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 560
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8211749500698513
          entropy_coeff: 0.009999999999999998
          kl: 0.01135435053430967
          policy_loss: -0.03158805337217119
          total_loss: -0.049454546595613165
          vf_explained_var: -0.1791807860136032
          vf_loss: 0.0001656276557898511
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,234,4904.92,234000,0,0,0,440.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-09_16-13-54
  done: false
  episode_len_mean: 443.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 562
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8964116507106357
          entropy_coeff: 0.009999999999999998
          kl: 0.010179654798137743
          policy_loss: -0.04079553716712528
          total_loss: -0.059486648150616224
          vf_explained_var: -0.7886021733283997
          vf_loss: 0.00011195928859201054
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,235,4921.97,235000,0,0,0,443.41


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-09_16-14-08
  done: false
  episode_len_mean: 444.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 563
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0000445697042677
          entropy_coeff: 0.009999999999999998
          kl: 0.010301567281865716
          policy_loss: -0.006456945091485977
          total_loss: -0.026162286475300788
          vf_explained_var: -0.4707740247249603
          vf_loss: 0.0001321286986947396
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,236,4936.22,236000,0,0,0,444.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-09_16-14-25
  done: false
  episode_len_mean: 448.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 565
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9848913894759284
          entropy_coeff: 0.009999999999999998
          kl: 0.011491020693791161
          policy_loss: 0.019860043418076304
          total_loss: 0.0002928300450245539
          vf_explained_var: -0.831053614616394
          vf_loss: 9.990866464148793e-05
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,237,4952.84,237000,0,0,0,448.07


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-09_16-14-41
  done: false
  episode_len_mean: 450.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 567
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9888832714822557
          entropy_coeff: 0.009999999999999998
          kl: 0.014234767506450445
          policy_loss: -0.0050998510585890874
          total_loss: -0.02463931060499615
          vf_explained_var: -0.5854783654212952
          vf_loss: 0.00012417510442901402
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,238,4968.99,238000,0,0,0,450.53




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-09_16-15-15
  done: false
  episode_len_mean: 451.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 570
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.807257138358222
          entropy_coeff: 0.009999999999999998
          kl: 0.011858586611617422
          policy_loss: -0.010942975514464909
          total_loss: -0.02863820923699273
          vf_explained_var: -0.2641127407550812
          vf_loss: 0.0001897329207470951
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,239,5003.58,239000,0,0,0,451.6


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-09_16-15-33
  done: false
  episode_len_mean: 453.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 571
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9124950488408408
          entropy_coeff: 0.009999999999999998
          kl: 0.013398751812314636
          policy_loss: 0.08626539980371793
          total_loss: 0.06743445263968574
          vf_explained_var: -0.29301780462265015
          vf_loss: 8.202952562391551e-05
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,240,5021.16,240000,0,0,0,453.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-09_16-15-51
  done: false
  episode_len_mean: 455.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 574
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.926882070965237
          entropy_coeff: 0.009999999999999998
          kl: 0.011953601670470625
          policy_loss: -0.020512397297554545
          total_loss: -0.03940101845396889
          vf_explained_var: -0.7818788290023804
          vf_loss: 0.00019109069891985403
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,241,5039.4,241000,0,0,0,455.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-09_16-16-12
  done: false
  episode_len_mean: 454.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 576
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6141456524531046
          entropy_coeff: 0.009999999999999998
          kl: 0.011919216662088615
          policy_loss: -0.027601496875286104
          total_loss: -0.04319681508673562
          vf_explained_var: 0.3551444113254547
          vf_loss: 0.0003575706508450417
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,242,5059.74,242000,0,0,0,454.79


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-09_16-16-30
  done: false
  episode_len_mean: 456.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 578
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7173328889740838
          entropy_coeff: 0.009999999999999998
          kl: 0.010478448108125744
          policy_loss: -0.09799484900302358
          total_loss: -0.11480062819189496
          vf_explained_var: -0.6579020619392395
          vf_loss: 0.0002017762047924205
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,243,5078.57,243000,0,0,0,456.67


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-09_16-16-48
  done: false
  episode_len_mean: 458.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 580
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.9210947632789612
          entropy_coeff: 0.009999999999999998
          kl: 0.0085365788523605
          policy_loss: -0.08037451584306028
          total_loss: -0.09931073095649481
          vf_explained_var: -0.4049414396286011
          vf_loss: 0.0001396774156698181
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,244,5096.5,244000,0,0,0,458.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-09_16-17-05
  done: false
  episode_len_mean: 459.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 582
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.930101290014055
          entropy_coeff: 0.009999999999999998
          kl: 0.01845555813031368
          policy_loss: -0.0029569020908739832
          total_loss: -0.021752453222870827
          vf_explained_var: -0.06204622983932495
          vf_loss: 0.0002134907090900621
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,245,5112.94,245000,0,0,0,459.23


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-09_16-17-22
  done: false
  episode_len_mean: 461.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 584
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7892523010571797
          entropy_coeff: 0.009999999999999998
          kl: 0.011269390636909155
          policy_loss: 0.0070603083198269205
          total_loss: -0.010473277895814842
          vf_explained_var: -0.814426839351654
          vf_loss: 0.00018064902574729382
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,246,5130.17,246000,0,0,0,461.72


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-09_16-17-43
  done: false
  episode_len_mean: 461.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 587
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8941306233406068
          entropy_coeff: 0.009999999999999998
          kl: 0.013181490270750117
          policy_loss: 0.0027231626626518035
          total_loss: -0.01572588843603929
          vf_explained_var: -0.388994425535202
          vf_loss: 0.00028372164346769245
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,247,5150.56,247000,0,0,0,461.66


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-09_16-18-02
  done: false
  episode_len_mean: 462.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 589
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6787773013114928
          entropy_coeff: 0.009999999999999998
          kl: 0.010514450858991293
          policy_loss: -0.044815718341204853
          total_loss: -0.061266300744480556
          vf_explained_var: -0.6657524704933167
          vf_loss: 0.00017084803338447173
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,248,5170.02,248000,0,0,0,462.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-09_16-18-20
  done: false
  episode_len_mean: 463.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 591
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8257441692882115
          entropy_coeff: 0.009999999999999998
          kl: 0.012670416957400477
          policy_loss: -0.05912998773985439
          total_loss: -0.07701842474440733
          vf_explained_var: -0.9775105118751526
          vf_loss: 0.00016855713515219071
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,249,5188.34,249000,0,0,0,463.69


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-09_16-18-41
  done: false
  episode_len_mean: 463.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 593
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0227650350994533
          entropy_coeff: 0.009999999999999998
          kl: 0.01746968537259202
          policy_loss: -0.013811878114938735
          total_loss: -0.033625616298781504
          vf_explained_var: -0.9616786241531372
          vf_loss: 0.000137537366713837
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,250,5209.04,250000,0,0,0,463.73


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-09_16-18-59
  done: false
  episode_len_mean: 463.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 595
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.875073258082072
          entropy_coeff: 0.009999999999999998
          kl: 0.019585124289761384
          policy_loss: -0.07849961833821403
          total_loss: -0.09678131143252054
          vf_explained_var: -0.1378510743379593
          vf_loss: 0.0001591964236771067
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,251,5227.35,251000,0,0,0,463.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-09_16-19-14
  done: false
  episode_len_mean: 468.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 597
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6604021112124125
          entropy_coeff: 0.009999999999999998
          kl: 0.018759352482174644
          policy_loss: -0.001651905911664168
          total_loss: -0.017927670064899655
          vf_explained_var: -0.705698549747467
          vf_loss: 3.148078686131663e-05
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,252,5242.34,252000,0,0,0,468.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-09_16-19-33
  done: false
  episode_len_mean: 471.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 599
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.975137264198727
          entropy_coeff: 0.009999999999999998
          kl: 0.01357084645410901
          policy_loss: -0.026706447783443662
          total_loss: -0.04607828340182702
          vf_explained_var: -0.5030237436294556
          vf_loss: 0.00016484317485365965
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,253,5261.24,253000,0,0,0,471.65




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-09_16-20-07
  done: false
  episode_len_mean: 473.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 601
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7718210445510016
          entropy_coeff: 0.009999999999999998
          kl: 0.023429944168441193
          policy_loss: -0.06452230083652669
          total_loss: -0.08176983080597387
          vf_explained_var: -0.6326974630355835
          vf_loss: 0.00010001325651450719
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,254,5295.06,254000,0,0,0,473.71


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-09_16-20-25
  done: false
  episode_len_mean: 475.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 603
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.85129351483451
          entropy_coeff: 0.009999999999999998
          kl: 0.012499798116251236
          policy_loss: -0.07407031978170077
          total_loss: -0.09217831128173404
          vf_explained_var: -0.66136634349823
          vf_loss: 0.00010831794109738742
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,255,5313.22,255000,0,0,0,475


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-09_16-20-41
  done: false
  episode_len_mean: 476.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 604
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8340713964568245
          entropy_coeff: 0.009999999999999998
          kl: 0.010968696092820135
          policy_loss: -0.035929204440779156
          total_loss: -0.053958894312381746
          vf_explained_var: -1.0
          vf_loss: 5.0733081338370945e-05
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,256,5329.27,256000,0,0,0,476.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-09_16-20-59
  done: false
  episode_len_mean: 480.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 606
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7396918058395385
          entropy_coeff: 0.009999999999999998
          kl: 0.010613461461680017
          policy_loss: -0.016869356917838255
          total_loss: -0.03391709617442555
          vf_explained_var: -0.390960693359375
          vf_loss: 9.73168492540329e-05
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,257,5347.09,257000,0,0,0,480.07


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-09_16-21-18
  done: false
  episode_len_mean: 480.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 608
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8266860418849522
          entropy_coeff: 0.009999999999999998
          kl: 0.010669259931353479
          policy_loss: 0.02681907872772879
          total_loss: 0.008869834989309312
          vf_explained_var: -0.8796023726463318
          vf_loss: 6.442685864587677e-05
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,258,5365.78,258000,0,0,0,480.17


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-09_16-21-38
  done: false
  episode_len_mean: 482.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 610
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8467036339971754
          entropy_coeff: 0.009999999999999998
          kl: 0.011791060501681066
          policy_loss: -0.026248664243353738
          total_loss: -0.044330182692242995
          vf_explained_var: -0.9443789124488831
          vf_loss: 0.00010570882582720111
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,259,5386.07,259000,0,0,0,482.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-09_16-21-59
  done: false
  episode_len_mean: 481.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 613
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8704504635598924
          entropy_coeff: 0.009999999999999998
          kl: 0.01216972918058664
          policy_loss: 0.02073248161209954
          total_loss: 0.0024448777238527935
          vf_explained_var: -0.9103450179100037
          vf_loss: 0.0001281067369240595
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,260,5406.63,260000,0,0,0,481.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-09_16-22-20
  done: false
  episode_len_mean: 479.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 615
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7749973297119142
          entropy_coeff: 0.009999999999999998
          kl: 0.013103815599628671
          policy_loss: -0.03803403170572387
          total_loss: -0.055308235519462165
          vf_explained_var: -0.8457498550415039
          vf_loss: 0.00016481039937288087
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,261,5427.92,261000,0,0,0,479.35


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-09_16-22-42
  done: false
  episode_len_mean: 480.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 617
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.814703851275974
          entropy_coeff: 0.009999999999999998
          kl: 0.013214241662799017
          policy_loss: -0.044037393977244696
          total_loss: -0.06168481748965052
          vf_explained_var: -0.9153910279273987
          vf_loss: 0.0001860343471182407
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,262,5449.65,262000,0,0,0,480.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-09_16-23-02
  done: false
  episode_len_mean: 479.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 619
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9321239405208164
          entropy_coeff: 0.009999999999999998
          kl: 0.01139663114373418
          policy_loss: -0.024019997235801484
          total_loss: -0.042958527968989475
          vf_explained_var: -0.9998120069503784
          vf_loss: 0.00011225933097496939
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,263,5470.12,263000,0,0,0,479.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-09_16-23-24
  done: false
  episode_len_mean: 480.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 622
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8186872826682197
          entropy_coeff: 0.009999999999999998
          kl: 0.014604899241264391
          policy_loss: -0.022814901607731978
          total_loss: -0.04056578775246938
          vf_explained_var: -0.46842190623283386
          vf_loss: 8.940717430555701e-05
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,264,5491.52,264000,0,0,0,480.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-09_16-23-47
  done: false
  episode_len_mean: 479.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 624
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9640670352511935
          entropy_coeff: 0.009999999999999998
          kl: 0.010019505226687405
          policy_loss: -0.023202566926678023
          total_loss: -0.042518485875593294
          vf_explained_var: -0.6959994435310364
          vf_loss: 8.698698167993118e-05
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,265,5514.41,265000,0,0,0,479.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-09_16-24-07
  done: false
  episode_len_mean: 479.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 626
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6982089890374077
          entropy_coeff: 0.009999999999999998
          kl: 0.009346295050301616
          policy_loss: 0.07660186969571643
          total_loss: 0.0599007124081254
          vf_explained_var: 0.06599447131156921
          vf_loss: 5.914197940405251e-05
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,266,5534.41,266000,0,0,0,479.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-09_16-24-28
  done: false
  episode_len_mean: 481.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 629
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8562223792076111
          entropy_coeff: 0.009999999999999998
          kl: 0.014250371689814934
          policy_loss: -0.07390292642845048
          total_loss: -0.09204037876592742
          vf_explained_var: -0.5268504023551941
          vf_loss: 8.66014271398146e-05
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,267,5556.01,267000,0,0,0,481.02




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-09_16-25-07
  done: false
  episode_len_mean: 479.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 631
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.797497652636634
          entropy_coeff: 0.009999999999999998
          kl: 0.013398607141390784
          policy_loss: 0.0030636784310142198
          total_loss: -0.014551288696626823
          vf_explained_var: -0.5980709791183472
          vf_loss: 4.205331780313928e-05
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,268,5595.06,268000,0,0,0,479.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-09_16-25-26
  done: false
  episode_len_mean: 480.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 633
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7132478793462118
          entropy_coeff: 0.009999999999999998
          kl: 0.024335060967988653
          policy_loss: 0.04420688268211153
          total_loss: 0.08323970387379329
          vf_explained_var: -0.6282535791397095
          vf_loss: 0.05558781295419774
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,269,5613.94,269000,-0.03,0,-3,480.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-09_16-25-46
  done: false
  episode_len_mean: 478.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 636
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7244366142484877
          entropy_coeff: 0.009999999999999998
          kl: 0.012335136623984057
          policy_loss: 0.08892283729381031
          total_loss: 0.07406869812144173
          vf_explained_var: 0.19350437819957733
          vf_loss: 0.0019511487904108234
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,270,5633.68,270000,-0.03,0,-3,478.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-09_16-26-05
  done: false
  episode_len_mean: 478.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 638
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.728412351343367
          entropy_coeff: 0.009999999999999998
          kl: 0.01262425012308515
          policy_loss: -0.005568623377217187
          total_loss: -0.02194612692627642
          vf_explained_var: 0.1400679498910904
          vf_loss: 0.00045724712318689047
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,271,5652.99,271000,-0.03,0,-3,478.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-09_16-26-26
  done: false
  episode_len_mean: 478.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 640
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7393496751785278
          entropy_coeff: 0.009999999999999998
          kl: 0.011187083015541353
          policy_loss: -0.08218586602144771
          total_loss: -0.09884938854310248
          vf_explained_var: -0.3382240831851959
          vf_loss: 0.00033176631542220197
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,272,5673.39,272000,-0.03,0,-3,478


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-09_16-26-49
  done: false
  episode_len_mean: 474.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 643
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7620482206344605
          entropy_coeff: 0.009999999999999998
          kl: 0.01481723559862276
          policy_loss: 0.015559048453966776
          total_loss: -0.0010980096128251817
          vf_explained_var: -0.041552748531103134
          vf_loss: 0.0004359915165372917
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,273,5696.21,273000,-0.03,0,-3,474.83


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-09_16-27-07
  done: false
  episode_len_mean: 475.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 645
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7365757279925875
          entropy_coeff: 0.009999999999999998
          kl: 0.00897100876041039
          policy_loss: -0.059762602051099144
          total_loss: -0.07659109731515248
          vf_explained_var: -0.21614906191825867
          vf_loss: 0.0002179306336782045
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,274,5714.81,274000,-0.03,0,-3,475.11


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-09_16-27-29
  done: false
  episode_len_mean: 474.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 647
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8452466527620952
          entropy_coeff: 0.009999999999999998
          kl: 0.009709999286284014
          policy_loss: 0.12298727904756863
          total_loss: 0.10508299767971038
          vf_explained_var: -0.17100222408771515
          vf_loss: 0.00020255139243090524
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,275,5736.11,275000,-0.03,0,-3,474.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-09_16-27-49
  done: false
  episode_len_mean: 471.8
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 650
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7554155548413595
          entropy_coeff: 0.009999999999999998
          kl: 0.012772061303358465
          policy_loss: -0.023366337662769687
          total_loss: -0.04025607160809967
          vf_explained_var: -0.8708781599998474
          vf_loss: 0.0002097934578260821
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,276,5756.51,276000,-0.03,0,-3,471.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-09_16-28-09
  done: false
  episode_len_mean: 471.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 652
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8080695046318902
          entropy_coeff: 0.009999999999999998
          kl: 0.017150033173899615
          policy_loss: 0.06845188699662685
          total_loss: 0.051112689036462045
          vf_explained_var: -0.260018527507782
          vf_loss: 0.00013102957673254422
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,277,5776.79,277000,-0.03,0,-3,471.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-09_16-28-30
  done: false
  episode_len_mean: 469.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 654
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7958305120468139
          entropy_coeff: 0.009999999999999998
          kl: 0.011305348599463055
          policy_loss: -0.12138333171606064
          total_loss: -0.13877547399865256
          vf_explained_var: -0.016216658055782318
          vf_loss: 0.00016374305972324994
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,278,5797.79,278000,-0.03,0,-3,469.67


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-09_16-28-50
  done: false
  episode_len_mean: 466.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 657
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6834415780173408
          entropy_coeff: 0.009999999999999998
          kl: 0.014886358411112985
          policy_loss: 0.014326235486401452
          total_loss: -0.0018543654017978244
          vf_explained_var: -0.2555370032787323
          vf_loss: 0.00012391946472942235
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,279,5817.39,279000,-0.03,0,-3,466.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-09_16-29-11
  done: false
  episode_len_mean: 465.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 659
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8486768060260348
          entropy_coeff: 0.009999999999999998
          kl: 0.010528970024260904
          policy_loss: -0.006537346293528875
          total_loss: -0.024509285390377045
          vf_explained_var: -0.9886908531188965
          vf_loss: 0.00014003914345974206
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,280,5838.04,280000,-0.03,0,-3,465.13




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-09_16-29-48
  done: false
  episode_len_mean: 463.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 661
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7387938804096645
          entropy_coeff: 0.009999999999999998
          kl: 0.01177585477584061
          policy_loss: -0.05750272509952386
          total_loss: -0.07438385751512315
          vf_explained_var: -0.024929529055953026
          vf_loss: 8.763698691230578e-05
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,281,5875.04,281000,-0.03,0,-3,463.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-09_16-30-08
  done: false
  episode_len_mean: 459.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 664
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8147431320614285
          entropy_coeff: 0.009999999999999998
          kl: 0.01411173323675854
          policy_loss: -0.0948322772151894
          total_loss: -0.11234094810982545
          vf_explained_var: -0.1074923500418663
          vf_loss: 0.0001364429907375274
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,282,5895.28,282000,-0.03,0,-3,459.03


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-09_16-30-28
  done: false
  episode_len_mean: 456.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 666
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.722153079509735
          entropy_coeff: 0.009999999999999998
          kl: 0.011041415348436818
          policy_loss: 0.011536445944673485
          total_loss: -0.005205408359567325
          vf_explained_var: -0.3267260789871216
          vf_loss: 8.664750732552622e-05
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,283,5914.88,283000,-0.03,0,-3,456.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-09_16-30-48
  done: false
  episode_len_mean: 454.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 668
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7528382963604396
          entropy_coeff: 0.009999999999999998
          kl: 0.01737678968215819
          policy_loss: -0.05491688855820232
          total_loss: -0.07176703363656997
          vf_explained_var: -0.6383150815963745
          vf_loss: 5.969776304684476e-05
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,284,5935.19,284000,-0.03,0,-3,454.81


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-09_16-31-11
  done: false
  episode_len_mean: 451.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 671
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5851265867551168
          entropy_coeff: 0.009999999999999998
          kl: 0.010724277062000543
          policy_loss: -0.059333109358946486
          total_loss: -0.0746040197296275
          vf_explained_var: -0.3869033753871918
          vf_loss: 0.0001986176361646762
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,285,5957.84,285000,-0.03,0,-3,451.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-09_16-31-32
  done: false
  episode_len_mean: 450.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 674
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.731181553999583
          entropy_coeff: 0.009999999999999998
          kl: 0.012573915526452062
          policy_loss: -0.02904113084077835
          total_loss: -0.04579896421896087
          vf_explained_var: -0.9581840634346008
          vf_loss: 0.00010640633022881552
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,286,5978.97,286000,-0.03,0,-3,450.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-09_16-31-54
  done: false
  episode_len_mean: 449.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 676
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6568456994162666
          entropy_coeff: 0.009999999999999998
          kl: 0.009635644906846885
          policy_loss: -0.1073537777695391
          total_loss: -0.12345456158121428
          vf_explained_var: -0.38142597675323486
          vf_loss: 0.00012468326905137574
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,287,6001.1,287000,-0.03,0,-3,449.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-09_16-32-15
  done: false
  episode_len_mean: 447.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 679
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7492978201972114
          entropy_coeff: 0.009999999999999998
          kl: 0.011039843676925109
          policy_loss: 0.1176256155802144
          total_loss: 0.10058641185363133
          vf_explained_var: -0.9877239465713501
          vf_loss: 6.080282098789919e-05
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,288,6022.6,288000,-0.03,0,-3,447.71


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-09_16-32-37
  done: false
  episode_len_mean: 446.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 681
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.855860353840722
          entropy_coeff: 0.009999999999999998
          kl: 0.013795856859170305
          policy_loss: 0.028222518745395872
          total_loss: 0.01019872741566764
          vf_explained_var: -0.8579778075218201
          vf_loss: 4.373816289242111e-05
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,289,6043.79,289000,-0.03,0,-3,446.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-09_16-32-55
  done: false
  episode_len_mean: 445.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 683
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7283424933751423
          entropy_coeff: 0.009999999999999998
          kl: 0.015786839815288674
          policy_loss: -0.07100525663958655
          total_loss: -0.08766738325357437
          vf_explained_var: -0.3510124683380127
          vf_loss: 5.9356018725035634e-05
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,290,6062,290000,-0.03,0,-3,445.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-09_16-33-15
  done: false
  episode_len_mean: 444.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 685
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7076087766223484
          entropy_coeff: 0.009999999999999998
          kl: 0.014158188912150196
          policy_loss: -0.08117862459686068
          total_loss: -0.09764857391516367
          vf_explained_var: -0.5592575073242188
          vf_loss: 0.00010216710240153285
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,291,6082.18,291000,-0.03,0,-3,444.91


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-09_16-33-31
  done: false
  episode_len_mean: 446.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 687
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7014295975367228
          entropy_coeff: 0.009999999999999998
          kl: 0.012774296720608388
          policy_loss: -0.04314078804519442
          total_loss: -0.059676288151078755
          vf_explained_var: -0.23752649128437042
          vf_loss: 2.4084395797924825e-05
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,292,6098.61,292000,-0.03,0,-3,446.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-09_16-33-49
  done: false
  episode_len_mean: 447.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 689
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.559748281372918
          entropy_coeff: 0.009999999999999998
          kl: 0.00950505151358092
          policy_loss: -0.011258359936376413
          total_loss: -0.026468278591831526
          vf_explained_var: -0.9774453043937683
          vf_loss: 4.922655368015209e-05
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,293,6116.23,293000,-0.03,0,-3,447.36




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-09_16-34-21
  done: false
  episode_len_mean: 449.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 691
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5481203979916043
          entropy_coeff: 0.009999999999999998
          kl: 0.01158375347171693
          policy_loss: 0.006999898350073232
          total_loss: -0.008024490169352956
          vf_explained_var: -0.999451220035553
          vf_loss: 4.4482677326919255e-05
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,294,6147.67,294000,-0.03,0,-3,449.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-09_16-34-36
  done: false
  episode_len_mean: 452.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 693
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8427776045269437
          entropy_coeff: 0.009999999999999998
          kl: 0.020478435631687948
          policy_loss: -0.004388086270127031
          total_loss: -0.02204256947669718
          vf_explained_var: -0.6586599349975586
          vf_loss: 4.434577882016634e-05
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,295,6162.75,295000,-0.03,0,-3,452.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-09_16-34-54
  done: false
  episode_len_mean: 453.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 695
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9032308671209548
          entropy_coeff: 0.009999999999999998
          kl: 0.0114858402513737
          policy_loss: -0.027899293932649824
          total_loss: -0.04624516152673298
          vf_explained_var: -0.21140684187412262
          vf_loss: 7.317312786957094e-05
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,296,6180.95,296000,-0.03,0,-3,453.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-09_16-35-11
  done: false
  episode_len_mean: 451.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 696
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9167507237858243
          entropy_coeff: 0.009999999999999998
          kl: 0.010992712475730797
          policy_loss: -0.05215156342213353
          total_loss: -0.0706325536283354
          vf_explained_var: -1.0
          vf_loss: 9.957492030581408e-05
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,297,6198.49,297000,-0.03,0,-3,451.94


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-09_16-35-28
  done: false
  episode_len_mean: 452.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 698
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9173453278011745
          entropy_coeff: 0.009999999999999998
          kl: 0.010351811161008268
          policy_loss: 0.03496697054555019
          total_loss: 0.016443775708062783
          vf_explained_var: -1.0
          vf_loss: 9.753849541690821e-05
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,298,6214.62,298000,-0.03,0,-3,452.65


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-09_16-35-46
  done: false
  episode_len_mean: 453.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 700
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9617960625224644
          entropy_coeff: 0.009999999999999998
          kl: 0.013825898219485013
          policy_loss: -0.05271392501890659
          total_loss: -0.07143373141686121
          vf_explained_var: -0.9336583614349365
          vf_loss: 0.00015993973534528374
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,299,6232.91,299000,-0.03,0,-3,453.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-09_16-36-03
  done: false
  episode_len_mean: 452.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 702
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9745875385072496
          entropy_coeff: 0.009999999999999998
          kl: 0.013399820029412663
          policy_loss: -0.0830832451581955
          total_loss: -0.10200343529383342
          vf_explained_var: -0.981566846370697
          vf_loss: 0.00011022228657869467
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,300,6249.91,300000,-0.03,0,-3,452.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-09_16-36-20
  done: false
  episode_len_mean: 451.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 704
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 2.012256047460768
          entropy_coeff: 0.009999999999999998
          kl: 0.008312773395254899
          policy_loss: 0.007238453295495775
          total_loss: -0.012338242638442251
          vf_explained_var: -0.8250820636749268
          vf_loss: 0.0001020150218310947
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,301,6266.95,301000,-0.03,0,-3,451.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-09_16-36-36
  done: false
  episode_len_mean: 450.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 706
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9707373552852208
          entropy_coeff: 0.009999999999999998
          kl: 0.009798552062986686
          policy_loss: -0.031744682581888305
          total_loss: -0.050830450902382536
          vf_explained_var: -1.0
          vf_loss: 9.842664730967954e-05
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,302,6283.25,302000,-0.03,0,-3,450.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-09_16-36-55
  done: false
  episode_len_mean: 451.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 708
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9727089073922899
          entropy_coeff: 0.009999999999999998
          kl: 0.01978148977403406
          policy_loss: -0.07683501508500841
          total_loss: -0.09541928426673014
          vf_explained_var: -0.8778991103172302
          vf_loss: 8.66140964313268e-05
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,303,6301.57,303000,-0.03,0,-3,451.44


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-09_16-37-12
  done: false
  episode_len_mean: 451.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 710
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0706976426972283
          entropy_coeff: 0.009999999999999998
          kl: 0.008591346567838906
          policy_loss: 0.009848197259836726
          total_loss: -0.010328922586308586
          vf_explained_var: -0.9260637164115906
          vf_loss: 7.112966316829746e-05
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,304,6319.26,304000,-0.03,0,-3,451.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-09_16-37-30
  done: false
  episode_len_mean: 452.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 712
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9375900758637323
          entropy_coeff: 0.009999999999999998
          kl: 0.013694150860870459
          policy_loss: -0.086257214181953
          total_loss: -0.10479407029019462
          vf_explained_var: 0.007767815142869949
          vf_loss: 0.00010786232370365824
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,305,6336.85,305000,-0.03,0,-3,452.06


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-09_16-37-48
  done: false
  episode_len_mean: 453.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 714
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.858858315149943
          entropy_coeff: 0.009999999999999998
          kl: 0.01209507076755102
          policy_loss: -0.1058311296833886
          total_loss: -0.12364907827642228
          vf_explained_var: 0.22974848747253418
          vf_loss: 0.00012483273409695053
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,306,6354.62,306000,-0.03,0,-3,453.3


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-09_16-38-06
  done: false
  episode_len_mean: 454.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 716
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.814637593428294
          entropy_coeff: 0.009999999999999998
          kl: 0.012118946867964218
          policy_loss: -0.10113286905818515
          total_loss: -0.11852783610423406
          vf_explained_var: 0.04719946160912514
          vf_loss: 0.00010433178605227214
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,307,6372.8,307000,-0.03,0,-3,454.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-09_16-38-23
  done: false
  episode_len_mean: 456.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 718
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.953978799449073
          entropy_coeff: 0.009999999999999998
          kl: 0.014986673398901276
          policy_loss: -0.046617322911818825
          total_loss: -0.06524943419628673
          vf_explained_var: -0.20591531693935394
          vf_loss: 0.00010748043866139293
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,308,6390.18,308000,-0.03,0,-3,456.23




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-09_16-38-58
  done: false
  episode_len_mean: 455.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 720
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0710043165418837
          entropy_coeff: 0.009999999999999998
          kl: 0.011052629149492076
          policy_loss: 0.015666315683888064
          total_loss: -0.004418338504102495
          vf_explained_var: -0.7763226628303528
          vf_loss: 3.524892603713346e-05
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,309,6425.2,309000,-0.03,0,-3,455.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-09_16-39-18
  done: false
  episode_len_mean: 456.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 722
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.892886209487915
          entropy_coeff: 0.009999999999999998
          kl: 0.010784519324771619
          policy_loss: -0.01769907642155886
          total_loss: -0.03591440547671583
          vf_explained_var: -0.9062246680259705
          vf_loss: 0.00013770655655712794
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,310,6445.05,310000,-0.03,0,-3,456.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-09_16-39-36
  done: false
  episode_len_mean: 458.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 724
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8786634617381626
          entropy_coeff: 0.009999999999999998
          kl: 0.0137945026638109
          policy_loss: -0.07666832841932773
          total_loss: -0.09460805476539665
          vf_explained_var: -1.0
          vf_loss: 0.00011036982282853892
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip: 192.168.3.5
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,311,6463.13,311000,-0.03,0,-3,458.36


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-09_16-39-53
  done: false
  episode_len_mean: 459.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 726
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8688854058583577
          entropy_coeff: 0.009999999999999998
          kl: 0.009018062193285084
          policy_loss: -0.05332535128626559
          total_loss: -0.07138010267582205
          vf_explained_var: -0.7777732610702515
          vf_loss: 0.00015259553850531423
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,312,6479.85,312000,-0.03,0,-3,459.3


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-09_16-40-11
  done: false
  episode_len_mean: 460.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 728
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9119883073700799
          entropy_coeff: 0.009999999999999998
          kl: 0.010759755325401767
          policy_loss: -0.0014909416230188476
          total_loss: -0.0199398181711634
          vf_explained_var: -0.9938287734985352
          vf_loss: 9.650435030784542e-05
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,313,6497.79,313000,-0.03,0,-3,460.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-09_16-40-31
  done: false
  episode_len_mean: 461.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 731
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8250386118888855
          entropy_coeff: 0.009999999999999998
          kl: 0.015063881229252176
          policy_loss: -0.010006088163289758
          total_loss: -0.0273088240582082
          vf_explained_var: -0.38388511538505554
          vf_loss: 0.00014333534313158857
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,314,6517.51,314000,-0.03,0,-3,461.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-09_16-40-52
  done: false
  episode_len_mean: 459.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 733
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8078969730271233
          entropy_coeff: 0.009999999999999998
          kl: 0.008672383001423018
          policy_loss: 0.019778422721558147
          total_loss: 0.0022620033472776414
          vf_explained_var: -0.8444816470146179
          vf_loss: 9.950058766763605e-05
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,315,6538.93,315000,0,0,0,459.98


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-09_16-41-12
  done: false
  episode_len_mean: 459.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 736
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225158267550998
          entropy_coeff: 0.009999999999999998
          kl: 0.011513678870263987
          policy_loss: -0.05336193175365527
          total_loss: -0.07088964593907197
          vf_explained_var: -0.8531293272972107
          vf_loss: 8.268946096197598e-05
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,316,6558.69,316000,0,0,0,459.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-09_16-41-32
  done: false
  episode_len_mean: 459.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 738
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6180223517947727
          entropy_coeff: 0.009999999999999998
          kl: 0.011773700177002105
          policy_loss: -0.010470791533589364
          total_loss: -0.025952495779428215
          vf_explained_var: -1.0
          vf_loss: 6.98776940731073e-05
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip: 192.168.3.5
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,317,6578.65,317000,0,0,0,459.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-09_16-41-53
  done: false
  episode_len_mean: 460.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 740
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7328697774145339
          entropy_coeff: 0.009999999999999998
          kl: 0.012911480996882574
          policy_loss: 0.03079516070170535
          total_loss: 0.01422031716340118
          vf_explained_var: -0.6976348757743835
          vf_loss: 6.446364334098892e-05
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,318,6599.66,318000,0,0,0,460.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-09_16-42-16
  done: false
  episode_len_mean: 459.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 743
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7019898242420621
          entropy_coeff: 0.009999999999999998
          kl: 0.011437999446750346
          policy_loss: -0.06413892577919696
          total_loss: -0.08039840206296908
          vf_explained_var: 0.03042825683951378
          vf_loss: 0.00014970522477395005
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,319,6622.82,319000,0,0,0,459.41


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-09_16-42-35
  done: false
  episode_len_mean: 459.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 745
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8106394131978354
          entropy_coeff: 0.009999999999999998
          kl: 0.012775761868370338
          policy_loss: -0.03679121434688568
          total_loss: -0.05413456575738059
          vf_explained_var: -0.813411295413971
          vf_loss: 8.090197677827544e-05
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,320,6641.76,320000,0,0,0,459.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-09_16-42-59
  done: false
  episode_len_mean: 457.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 748
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.70987649096383
          entropy_coeff: 0.009999999999999998
          kl: 0.013107429979837172
          policy_loss: 0.014050489105284215
          total_loss: -0.0022569985853301156
          vf_explained_var: -0.5145699977874756
          vf_loss: 9.142638009507209e-05
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,321,6665.38,321000,0,0,0,457.78




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-09_16-43-37
  done: false
  episode_len_mean: 456.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 751
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7284247345394559
          entropy_coeff: 0.009999999999999998
          kl: 0.013209143916497901
          policy_loss: -0.13813441192938222
          total_loss: -0.15460560843348503
          vf_explained_var: -0.36697447299957275
          vf_loss: 0.00010776696395219511
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,322,6703.87,322000,0,0,0,456.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-09_16-43-57
  done: false
  episode_len_mean: 456.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 753
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7019508944617376
          entropy_coeff: 0.009999999999999998
          kl: 0.015568354904261858
          policy_loss: -0.00888200087679757
          total_loss: -0.024967182344860502
          vf_explained_var: -0.8374032974243164
          vf_loss: 0.0001030801407371958
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,323,6723.22,323000,0,0,0,456.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-09_16-44-18
  done: false
  episode_len_mean: 456.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 755
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7718248393800524
          entropy_coeff: 0.009999999999999998
          kl: 0.011216896122065853
          policy_loss: -0.021051576361060143
          total_loss: -0.038102251001530224
          vf_explained_var: -0.010777494870126247
          vf_loss: 6.865991381346248e-05
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,324,6745.01,324000,0,0,0,456.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-09_16-44-40
  done: false
  episode_len_mean: 455.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 758
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7727350698577033
          entropy_coeff: 0.009999999999999998
          kl: 0.011715167351462118
          policy_loss: -0.07810282574759589
          total_loss: -0.09512314059668117
          vf_explained_var: -0.8986191749572754
          vf_loss: 8.152331328245864e-05
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,325,6766.16,325000,0,0,0,455.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-09_16-45-03
  done: false
  episode_len_mean: 453.56
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 761
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5438359220822653
          entropy_coeff: 0.009999999999999998
          kl: 0.018872071712492142
          policy_loss: -0.0520926124519772
          total_loss: -0.06640105686253972
          vf_explained_var: -0.71479332447052
          vf_loss: 0.00012226932121848222
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,326,6789.16,326000,0,0,0,453.56


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-09_16-45-27
  done: false
  episode_len_mean: 451.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 763
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6831847508748372
          entropy_coeff: 0.009999999999999998
          kl: 0.009383824917860275
          policy_loss: -0.022440208908584384
          total_loss: -0.0387130012942685
          vf_explained_var: 0.013840978965163231
          vf_loss: 5.801812029757356e-05
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,327,6813.14,327000,0,0,0,451.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-09_16-45-49
  done: false
  episode_len_mean: 450.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 766
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6334055529700384
          entropy_coeff: 0.009999999999999998
          kl: 0.009415611819106958
          policy_loss: 0.015458198967907164
          total_loss: -0.0003105983138084412
          vf_explained_var: -0.2669447064399719
          vf_loss: 6.252632467496571e-05
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,328,6835.56,328000,0,0,0,450.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-09_16-46-14
  done: false
  episode_len_mean: 447.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 769
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6767453948656719
          entropy_coeff: 0.009999999999999998
          kl: 0.010776647401841128
          policy_loss: -0.010698595891396205
          total_loss: -0.026834073000484043
          vf_explained_var: -0.43363139033317566
          vf_loss: 5.657218435872993e-05
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,329,6860.9,329000,0,0,0,447.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-09_16-46-38
  done: false
  episode_len_mean: 447.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 772
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.580103537771437
          entropy_coeff: 0.009999999999999998
          kl: 0.010908979604162046
          policy_loss: -0.07820711396634579
          total_loss: -0.09336852836940024
          vf_explained_var: -0.33095476031303406
          vf_loss: 5.7152732491279794e-05
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,330,6884.45,330000,0,0,0,447.65


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-09_16-47-05
  done: false
  episode_len_mean: 444.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 775
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.3230177972051833
          entropy_coeff: 0.009999999999999998
          kl: 0.009524982773916798
          policy_loss: -0.051907190183798475
          total_loss: -0.06455279762546222
          vf_explained_var: -0.8375434875488281
          vf_loss: 7.599767510934423e-05
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,331,6911.3,331000,0,0,0,444.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-09_16-47-32
  done: false
  episode_len_mean: 442.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 778
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5451772967974344
          entropy_coeff: 0.009999999999999998
          kl: 0.008378211389676002
          policy_loss: -0.04547617998388079
          total_loss: -0.060433640744951035
          vf_explained_var: -0.24075357615947723
          vf_loss: 4.696949981735088e-05
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,332,6938.28,332000,0,0,0,442.17




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-09_16-48-15
  done: false
  episode_len_mean: 439.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 781
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.5293036381403604
          entropy_coeff: 0.009999999999999998
          kl: 0.010768486440259375
          policy_loss: -0.038611617187658945
          total_loss: -0.05324660340944926
          vf_explained_var: -0.13399766385555267
          vf_loss: 8.307968265499867e-05
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,333,6981.65,333000,0,0,0,439.41


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-09_16-48-42
  done: false
  episode_len_mean: 432.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 785
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8011309994591607
          entropy_coeff: 0.009999999999999998
          kl: 0.01287440254576274
          policy_loss: -0.03617890212270949
          total_loss: -0.05342979903022448
          vf_explained_var: -0.17443878948688507
          vf_loss: 7.3001251217243e-05
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,334,7008.8,334000,0,0,0,432.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-09_16-49-12
  done: false
  episode_len_mean: 426.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 788
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0533935546875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7644010278913709
          entropy_coeff: 0.009999999999999998
          kl: 0.025272642036807645
          policy_loss: 0.10050454470846389
          total_loss: 0.8327462977833218
          vf_explained_var: -0.22227753698825836
          vf_loss: 0.7485363642840336
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,335,7038.11,335000,-0.18,0,-12,426.33


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-09_16-49-41
  done: false
  episode_len_mean: 414.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 792
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7016897320747375
          entropy_coeff: 0.009999999999999998
          kl: 0.012589154932623003
          policy_loss: 0.0024159006774425507
          total_loss: 0.04011598461204105
          vf_explained_var: 0.6665397882461548
          vf_loss: 0.053708711887399355
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,336,7067.34,336000,-0.18,0,-12,414.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-09_16-50-05
  done: false
  episode_len_mean: 409.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 795
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7112171451250713
          entropy_coeff: 0.009999999999999998
          kl: 0.018101400954903292
          policy_loss: 0.012076890551381642
          total_loss: 0.014435683108038373
          vf_explained_var: 0.5923033952713013
          vf_loss: 0.018021220082624093
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,337,7091.48,337000,-0.18,0,-12,409


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-09_16-50-28
  done: false
  episode_len_mean: 402.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 798
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.811668544345432
          entropy_coeff: 0.009999999999999998
          kl: 0.011428134168643527
          policy_loss: 0.05664947662088606
          total_loss: 0.04234723705384466
          vf_explained_var: 0.609605073928833
          vf_loss: 0.0028991638631042506
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,338,7114.04,338000,-0.18,0,-12,402.45


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-09_16-50-50
  done: false
  episode_len_mean: 400.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 800
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6919780810674032
          entropy_coeff: 0.009999999999999998
          kl: 0.013548782062962781
          policy_loss: -0.07610088586807251
          total_loss: -0.08827258911397722
          vf_explained_var: 0.4713405668735504
          vf_loss: 0.003662950047550516
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,339,7135.75,339000,-0.18,0,-12,400.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-09_16-51-13
  done: false
  episode_len_mean: 396.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 803
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.877640950679779
          entropy_coeff: 0.009999999999999998
          kl: 0.01478136614849856
          policy_loss: -0.03425285890698433
          total_loss: -0.04723798113150729
          vf_explained_var: -0.13186298310756683
          vf_loss: 0.004607442857619996
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,340,7158.85,340000,-0.18,0,-12,396.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-09_16-51-36
  done: false
  episode_len_mean: 390.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 806
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.98858503235711
          entropy_coeff: 0.009999999999999998
          kl: 0.014029443976735766
          policy_loss: 0.07664484447903103
          total_loss: 0.05845772481213014
          vf_explained_var: -0.10808488726615906
          vf_loss: 0.0005751064726306746
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,341,7182.24,341000,-0.18,0,-12,390.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-09_16-51-59
  done: false
  episode_len_mean: 386.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 809
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1070228695869444
          entropy_coeff: 0.009999999999999998
          kl: 0.009043319073911284
          policy_loss: -0.055516593960217304
          total_loss: -0.07551488234765
          vf_explained_var: 0.03521757572889328
          vf_loss: 0.00034765801441002014
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,342,7204.8,342000,-0.18,0,-12,386.21




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-09_16-52-41
  done: false
  episode_len_mean: 381.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 812
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1309199081526864
          entropy_coeff: 0.009999999999999998
          kl: 0.015461524833132304
          policy_loss: -0.06487698952356974
          total_loss: -0.08444144460890028
          vf_explained_var: -0.3971020579338074
          vf_loss: 0.0005064258631642184
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,343,7247.19,343000,-0.18,0,-12,381.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-09_16-53-07
  done: false
  episode_len_mean: 376.12
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 815
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1337700499428642
          entropy_coeff: 0.009999999999999998
          kl: 0.006028769374647207
          policy_loss: 0.07811647049254841
          total_loss: 0.05773023437294695
          vf_explained_var: -0.4006154239177704
          vf_loss: 0.0004686181510300634
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,344,7273.13,344000,-0.17,1,-12,376.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-09_16-53-33
  done: false
  episode_len_mean: 370.79
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 818
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 2.112147238519457
          entropy_coeff: 0.009999999999999998
          kl: 0.013170849344049135
          policy_loss: 0.00674780516160859
          total_loss: -0.012903203773829672
          vf_explained_var: -0.5385060906410217
          vf_loss: 0.000415606731419555
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,345,7299.13,345000,-0.17,1,-12,370.79


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-09_16-53-55
  done: false
  episode_len_mean: 367.61
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 821
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08009033203125002
          cur_lr: 5.000000000000001e-05
          entropy: 1.949219905005561
          entropy_coeff: 0.009999999999999998
          kl: 0.02279236946720112
          policy_loss: -0.016482206227050888
          total_loss: -0.03355724811553955
          vf_explained_var: 0.48643842339515686
          vf_loss: 0.0005917081333690374
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,346,7321.5,346000,-0.17,1,-12,367.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-09_16-54-20
  done: false
  episode_len_mean: 363.77
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 824
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.016873694790734
          entropy_coeff: 0.009999999999999998
          kl: 0.017606238014743664
          policy_loss: -0.05361244926850001
          total_loss: -0.07130191938744651
          vf_explained_var: 0.031995102763175964
          vf_loss: 0.0003641334111711735
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,347,7345.92,347000,-0.17,1,-12,363.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-09_16-54-44
  done: false
  episode_len_mean: 358.64
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 827
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.2164789226320054
          entropy_coeff: 0.009999999999999998
          kl: 0.010196326210882962
          policy_loss: -0.07993090682559544
          total_loss: -0.10056796752744251
          vf_explained_var: -0.42219871282577515
          vf_loss: 0.00030278749893315965
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,348,7369.78,348000,-0.17,1,-12,358.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-09_16-55-07
  done: false
  episode_len_mean: 355.78
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.17
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 830
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.170193174150255
          entropy_coeff: 0.009999999999999998
          kl: 0.009337318338038338
          policy_loss: 0.017877722365988627
          total_loss: -0.002281004356013404
          vf_explained_var: 0.32238876819610596
          vf_loss: 0.00042146317868577577
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,349,7392.68,349000,-0.17,1,-12,355.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-09_16-55-31
  done: false
  episode_len_mean: 353.7
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.23
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 832
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.219187765651279
          entropy_coeff: 0.009999999999999998
          kl: 0.011465926813252123
          policy_loss: 0.03399006575345993
          total_loss: 0.24398261573579577
          vf_explained_var: -0.5526103973388672
          vf_loss: 0.23080696068807607
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,350,7416.61,350000,-0.23,1,-12,353.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-09_16-55-56
  done: false
  episode_len_mean: 350.12
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.23
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 836
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.264414562119378
          entropy_coeff: 0.009999999999999998
          kl: 0.0104510197071071
          policy_loss: -0.08587806928488943
          total_loss: -0.09213119455509716
          vf_explained_var: 0.08557872474193573
          vf_loss: 0.015135482460674312
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,351,7442.41,351000,-0.23,1,-12,350.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-09_16-56-20
  done: false
  episode_len_mean: 348.41
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.23
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 838
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.367915725708008
          entropy_coeff: 0.009999999999999998
          kl: 0.010369736369837904
          policy_loss: -0.1086830832891994
          total_loss: -0.12187303362621202
          vf_explained_var: 0.16375568509101868
          vf_loss: 0.009243437471903032
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,352,7465.48,352000,-0.23,1,-12,348.41




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-09_16-57-02
  done: false
  episode_len_mean: 346.43
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.23
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 841
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.371286784278022
          entropy_coeff: 0.009999999999999998
          kl: 0.007230513274701058
          policy_loss: 0.007149384646779961
          total_loss: -0.014184323615498012
          vf_explained_var: -0.8613851070404053
          vf_loss: 0.0015105172985285106
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,353,7507.88,353000,-0.23,1,-12,346.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-09_16-57-29
  done: false
  episode_len_mean: 341.78
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 845
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.106144579251607
          entropy_coeff: 0.009999999999999998
          kl: 0.017432922751909965
          policy_loss: 0.05543830378188027
          total_loss: 0.49222902539703584
          vf_explained_var: -0.49819010496139526
          vf_loss: 0.45575785381822953
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,354,7534.81,354000,-0.34,1,-12,341.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-09_16-57-58
  done: false
  episode_len_mean: 339.46
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.39
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 848
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0075257142384846
          entropy_coeff: 0.009999999999999998
          kl: 0.018856201763833368
          policy_loss: 0.04835364123185475
          total_loss: 0.12406854298379687
          vf_explained_var: -0.37520632147789
          vf_loss: 0.09352485628074242
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,355,7563.53,355000,-0.39,1,-12,339.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-09_16-58-23
  done: false
  episode_len_mean: 336.95
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.45
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 851
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.156254866388109
          entropy_coeff: 0.009999999999999998
          kl: 0.011582136657604833
          policy_loss: 0.009370446039570702
          total_loss: 0.2833574898954895
          vf_explained_var: -0.11706239730119705
          vf_loss: 0.2941581719710181
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,356,7588.83,356000,-0.45,1,-12,336.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-09_16-58-48
  done: false
  episode_len_mean: 333.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.45
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 854
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.3166798750559487
          entropy_coeff: 0.009999999999999998
          kl: 0.012512173953008777
          policy_loss: -0.07892813757061959
          total_loss: -0.08458800812562307
          vf_explained_var: -0.056873735040426254
          vf_loss: 0.016003772797476914
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,357,7613.41,357000,-0.45,1,-12,333.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-09_16-59-12
  done: false
  episode_len_mean: 332.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.45
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 857
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.4174422396553887
          entropy_coeff: 0.009999999999999998
          kl: 0.005719955797088044
          policy_loss: -0.21105662484963736
          total_loss: -0.22637926273875766
          vf_explained_var: -0.3681158423423767
          vf_loss: 0.008164612979938587
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,358,7637.91,358000,-0.45,1,-12,332.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-09_16-59-37
  done: false
  episode_len_mean: 330.97
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 860
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.33583136399587
          entropy_coeff: 0.009999999999999998
          kl: 0.015504148770614387
          policy_loss: 0.003234295795361201
          total_loss: -0.014536575890249676
          vf_explained_var: -0.6733130812644958
          vf_loss: 0.0037248429894033404
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,359,7663.1,359000,-0.52,1,-12,330.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-09_17-00-03
  done: false
  episode_len_mean: 329.12
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 864
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.400808933046129
          entropy_coeff: 0.009999999999999998
          kl: 0.017224519737464028
          policy_loss: -0.06270839700268374
          total_loss: -0.08172897129423089
          vf_explained_var: -0.6589635014533997
          vf_loss: 0.0029182407966194053
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,360,7688.42,360000,-0.52,1,-12,329.12


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-09_17-00-25
  done: false
  episode_len_mean: 329.34
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 2
  episodes_total: 866
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.470906374189589
          entropy_coeff: 0.009999999999999998
          kl: 0.009787612186919346
          policy_loss: -0.0233644704023997
          total_loss: -0.04536548571454154
          vf_explained_var: -0.6284942030906677
          vf_loss: 0.0015322098010478334
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,361,7710.79,361000,-0.52,1,-12,329.34


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-09_17-00-49
  done: false
  episode_len_mean: 329.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 869
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.3447822252909343
          entropy_coeff: 0.009999999999999998
          kl: 0.010131820315761628
          policy_loss: 0.05242419160074658
          total_loss: 0.031653200172715716
          vf_explained_var: -0.6284095644950867
          vf_loss: 0.0014596394188831456
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,362,7734.95,362000,-0.52,1,-12,329.07




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-09_17-01-32
  done: false
  episode_len_mean: 327.63
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 873
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.231315522723728
          entropy_coeff: 0.009999999999999998
          kl: 0.010456445062714339
          policy_loss: 0.05564342161847485
          total_loss: 0.03621201432413525
          vf_explained_var: 0.5021857619285583
          vf_loss: 0.0016255564793633919
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,363,7777.36,363000,-0.52,1,-12,327.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-09_17-01-57
  done: false
  episode_len_mean: 328.04
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 876
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.3257033639483984
          entropy_coeff: 0.009999999999999998
          kl: 0.010921695162028507
          policy_loss: -0.013536716004212697
          total_loss: -0.034432058326072164
          vf_explained_var: -0.2795926332473755
          vf_loss: 0.0010496055480972346
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,364,7802.68,364000,-0.52,1,-12,328.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-09_17-02-21
  done: false
  episode_len_mean: 328.58
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 879
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.411293927828471
          entropy_coeff: 0.009999999999999998
          kl: 0.009830558032819603
          policy_loss: -0.10048558964497513
          total_loss: -0.1230775727579991
          vf_explained_var: -0.3958461880683899
          vf_loss: 0.0003399567081942223
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,365,7826.52,365000,-0.52,1,-12,328.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-09_17-02-48
  done: false
  episode_len_mean: 328.26
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.52
  episode_reward_min: -12.0
  episodes_this_iter: 3
  episodes_total: 882
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1632990333769055
          entropy_coeff: 0.009999999999999998
          kl: 0.011023844264875156
          policy_loss: -0.043244702203406225
          total_loss: -0.06191612026757664
          vf_explained_var: 0.2949404716491699
          vf_loss: 0.0016372148102770248
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,366,7854.1,366000,-0.52,1,-12,328.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-09_17-03-17
  done: false
  episode_len_mean: 328.15
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.46
  episode_reward_min: -12.0
  episodes_this_iter: 4
  episodes_total: 886
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0097436984380086
          entropy_coeff: 0.009999999999999998
          kl: 0.014781128824406133
          policy_loss: -0.043261899633540046
          total_loss: -0.06052739686436123
          vf_explained_var: 0.4619762897491455
          vf_loss: 0.0010562014449129087
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,367,7882.67,367000,-0.46,1,-12,328.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-09_17-03-44
  done: false
  episode_len_mean: 328.23
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 889
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1215440299775867
          entropy_coeff: 0.009999999999999998
          kl: 0.019701953964460808
          policy_loss: -0.11158579281634755
          total_loss: -0.12979162865214877
          vf_explained_var: 0.6100659966468811
          vf_loss: 0.0006426991034661317
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,368,7909.97,368000,-0.34,1,-11,328.23


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-09_17-04-13
  done: false
  episode_len_mean: 328.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 892
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.021900251176622
          entropy_coeff: 0.009999999999999998
          kl: 0.01149704787602715
          policy_loss: -0.1530060422089365
          total_loss: -0.17071684416797425
          vf_explained_var: -0.25890544056892395
          vf_loss: 0.0011269952759094951
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,369,7938.31,369000,-0.34,1,-11,328.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-09_17-04-39
  done: false
  episode_len_mean: 327.8
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 896
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0370572606722512
          entropy_coeff: 0.009999999999999998
          kl: 0.009515668790115188
          policy_loss: -0.0956707792977492
          total_loss: -0.11435766203535927
          vf_explained_var: 0.41102054715156555
          vf_loss: 0.0005405202799011021
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,370,7964.18,370000,-0.34,1,-11,327.8


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-09_17-05-05
  done: false
  episode_len_mean: 324.52
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 899
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.025974537266625
          entropy_coeff: 0.009999999999999998
          kl: 0.010626835028291303
          policy_loss: 0.03172116480353806
          total_loss: 0.01327938193248378
          vf_explained_var: -0.8166541457176208
          vf_loss: 0.0005413029996109091
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,371,7990.79,371000,-0.34,1,-11,324.52




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-09_17-05-50
  done: false
  episode_len_mean: 323.96
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 902
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.063798577255673
          entropy_coeff: 0.009999999999999998
          kl: 0.01341756448492717
          policy_loss: -0.026330521785550648
          total_loss: -0.04477609027591017
          vf_explained_var: -0.6461657285690308
          vf_loss: 0.0005804863195711126
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,372,8035.44,372000,-0.34,1,-11,323.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-09_17-06-17
  done: false
  episode_len_mean: 320.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 906
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9054103546672396
          entropy_coeff: 0.009999999999999998
          kl: 0.008679015301724767
          policy_loss: -0.050015993830230504
          total_loss: -0.06762494535909759
          vf_explained_var: 0.3361447751522064
          vf_loss: 0.0004024967829334653
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,373,8062.98,373000,-0.34,1,-11,320.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-09_17-06-45
  done: false
  episode_len_mean: 318.15
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.34
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 909
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9680428981781006
          entropy_coeff: 0.009999999999999998
          kl: 0.01701467185320548
          policy_loss: 0.0396829593512747
          total_loss: 0.022561144083738327
          vf_explained_var: -0.08580709248781204
          vf_loss: 0.0005145483575082229
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,374,8090.42,374000,-0.34,1,-11,318.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-09_17-07-12
  done: false
  episode_len_mean: 317.19
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.28
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 913
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9117276867230732
          entropy_coeff: 0.009999999999999998
          kl: 0.006812812576606022
          policy_loss: 0.2098203119304445
          total_loss: 0.19200426323546305
          vf_explained_var: 0.1788189709186554
          vf_loss: 0.0004827668421461971
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,375,8117.31,375000,-0.28,6,-11,317.19


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-09_17-07-38
  done: false
  episode_len_mean: 316.89
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 916
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1361022684309217
          entropy_coeff: 0.009999999999999998
          kl: 0.010974380372148455
          policy_loss: 0.007145387265417311
          total_loss: -0.012673980370163918
          vf_explained_var: -0.618773877620697
          vf_loss: 0.0002232428957035558
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,376,8143.25,376000,-0.29,6,-11,316.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-09_17-08-05
  done: false
  episode_len_mean: 316.53
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 919
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.166238522529602
          entropy_coeff: 0.009999999999999998
          kl: 0.013245123389553252
          policy_loss: -0.0864840779453516
          total_loss: -0.10635975342657832
          vf_explained_var: -0.24278460443019867
          vf_loss: 0.00019550024169600672
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,377,8170.1,377000,-0.29,6,-11,316.53


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-09_17-08-30
  done: false
  episode_len_mean: 314.94
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 922
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.128589733441671
          entropy_coeff: 0.009999999999999998
          kl: 0.007363524578794964
          policy_loss: -0.017702442821529177
          total_loss: -0.037753161953555214
          vf_explained_var: -0.11111903935670853
          vf_loss: 0.00035055714292361195
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,378,8195.11,378000,-0.29,6,-11,314.94


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-09_17-08-55
  done: false
  episode_len_mean: 315.18
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 925
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1248839484320747
          entropy_coeff: 0.009999999999999998
          kl: 0.007550307494121
          policy_loss: -0.07740944145868221
          total_loss: -0.09764894958999422
          vf_explained_var: -0.41890114545822144
          vf_loss: 0.00010227296111730135
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,379,8220.29,379000,-0.29,6,-11,315.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-09_17-09-21
  done: false
  episode_len_mean: 312.74
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.29
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 929
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1156390137142607
          entropy_coeff: 0.009999999999999998
          kl: 0.010268620792903506
          policy_loss: -0.015085064247250557
          total_loss: -0.034810280013415545
          vf_explained_var: -0.9757000207901001
          vf_loss: 0.00019754506057425815
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,380,8246,380000,-0.29,6,-11,312.74




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-09_17-10-05
  done: false
  episode_len_mean: 311.99
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.23
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 932
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1258209572898017
          entropy_coeff: 0.009999999999999998
          kl: 0.0099690631609939
          policy_loss: -0.04517589037617047
          total_loss: -0.0651603733499845
          vf_explained_var: 0.08702895045280457
          vf_loss: 7.608603314616226e-05
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,381,8290.75,381000,-0.23,6,-11,311.99


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-09_17-10-31
  done: false
  episode_len_mean: 310.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.23
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 935
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.2204616652594673
          entropy_coeff: 0.009999999999999998
          kl: 0.014079195343365372
          policy_loss: -0.08204346340563562
          total_loss: -0.10234046855734455
          vf_explained_var: -0.6844297647476196
          vf_loss: 0.00021619777997127838
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,382,8316.44,382000,-0.23,6,-11,310.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-09_17-10-57
  done: false
  episode_len_mean: 309.61
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.23
  episode_reward_min: -11.0
  episodes_this_iter: 4
  episodes_total: 939
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.2241334438323976
          entropy_coeff: 0.009999999999999998
          kl: 0.007065707296546976
          policy_loss: 0.01842221054765913
          total_loss: -0.0028479055398040348
          vf_explained_var: -0.690258264541626
          vf_loss: 0.000122376170192082
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,383,8342.37,383000,-0.23,6,-11,309.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-09_17-11-21
  done: false
  episode_len_mean: 309.67
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.23
  episode_reward_min: -11.0
  episodes_this_iter: 3
  episodes_total: 942
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.257630043559604
          entropy_coeff: 0.009999999999999998
          kl: 0.00792232456974236
          policy_loss: -0.11669892143044207
          total_loss: -0.1381396096613672
          vf_explained_var: -0.8424361348152161
          vf_loss: 0.0001838576880497082
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,384,8366.72,384000,-0.23,6,-11,309.67


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-09_17-11-46
  done: false
  episode_len_mean: 310.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.12
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 945
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.228438032997979
          entropy_coeff: 0.009999999999999998
          kl: 0.008044995213893813
          policy_loss: 0.012300935718748305
          total_loss: -0.008858174540930325
          vf_explained_var: -0.9930218458175659
          vf_loss: 0.00015878070456286272
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,385,8391.56,385000,-0.12,6,-7,310.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-09_17-12-11
  done: false
  episode_len_mean: 311.46
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.07
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 948
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1910951084560817
          entropy_coeff: 0.009999999999999998
          kl: 0.0073981004442610085
          policy_loss: -0.07567330582274331
          total_loss: -0.09656660142872069
          vf_explained_var: 0.2862665057182312
          vf_loss: 0.00012887839523803752
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,386,8416.38,386000,-0.07,6,-7,311.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-09_17-12-37
  done: false
  episode_len_mean: 311.58
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.01
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 951
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.921828677919176
          entropy_coeff: 0.009999999999999998
          kl: 0.010015775566976654
          policy_loss: -0.14672990093628566
          total_loss: -0.16458703577518463
          vf_explained_var: -0.3426990211009979
          vf_loss: 0.00015790278149425933
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,387,8442.63,387000,-0.01,6,-7,311.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-09_17-13-03
  done: false
  episode_len_mean: 310.7
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.01
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 954
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.002597671084934
          entropy_coeff: 0.009999999999999998
          kl: 0.008409671955654948
          policy_loss: -0.05860916500290235
          total_loss: -0.0774202358805471
          vf_explained_var: 0.10836570709943771
          vf_loss: 0.00020460862255681098
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,388,8468.4,388000,-0.01,6,-7,310.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-09_17-13-28
  done: false
  episode_len_mean: 309.96
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.01
  episode_reward_min: -7.0
  episodes_this_iter: 3
  episodes_total: 957
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0251202079984876
          entropy_coeff: 0.009999999999999998
          kl: 0.009496897480229076
          policy_loss: -0.05359183210465643
          total_loss: -0.07258268495400746
          vf_explained_var: -0.856799840927124
          vf_loss: 0.00011943364556322598
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,389,8493.54,389000,-0.01,6,-7,309.96




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-09_17-14-14
  done: false
  episode_len_mean: 308.51
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 961
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0430133395724828
          entropy_coeff: 0.009999999999999998
          kl: 0.013501323865921932
          policy_loss: -0.0585606956647502
          total_loss: -0.07721186917689112
          vf_explained_var: -0.7376992702484131
          vf_loss: 0.00015697168508065968
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,390,8538.92,390000,0.06,6,0,308.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-09_17-14-39
  done: false
  episode_len_mean: 308.4
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 964
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.097333867020077
          entropy_coeff: 0.009999999999999998
          kl: 0.017570537983291366
          policy_loss: -0.10734670749968953
          total_loss: -0.12608582948644956
          vf_explained_var: -0.4656691551208496
          vf_loss: 0.00012337003164349073
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,391,8563.72,391000,0.06,6,0,308.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-09_17-15-01
  done: false
  episode_len_mean: 307.84
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 967
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.1024550716082255
          entropy_coeff: 0.009999999999999998
          kl: 0.009933960353453999
          policy_loss: -0.08697397079732683
          total_loss: -0.10665470759073893
          vf_explained_var: 0.09174838662147522
          vf_loss: 0.0001503890830564261
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,392,8586.49,392000,0.06,6,0,307.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-09_17-15-25
  done: false
  episode_len_mean: 309.14
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 970
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0202060990863377
          entropy_coeff: 0.009999999999999998
          kl: 0.012813959116762617
          policy_loss: -0.10571975509325664
          total_loss: -0.12387492871946759
          vf_explained_var: -0.3596545457839966
          vf_loss: 0.0005074790711255951
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,393,8609.96,393000,0.06,6,0,309.14


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-09_17-15-49
  done: false
  episode_len_mean: 310.15
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 973
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.035703115993076
          entropy_coeff: 0.009999999999999998
          kl: 0.007227934708715426
          policy_loss: 0.014818825489944882
          total_loss: -0.0044044381628433864
          vf_explained_var: -0.3266737163066864
          vf_loss: 0.00026543715050340524
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,394,8633.93,394000,0.06,6,0,310.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-09_17-16-13
  done: false
  episode_len_mean: 310.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 975
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8835302617814806
          entropy_coeff: 0.009999999999999998
          kl: 0.012246329533769688
          policy_loss: 0.008162193414237764
          total_loss: -0.008859807501236598
          vf_explained_var: -0.4837474822998047
          vf_loss: 0.00034208562477336575
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,395,8658.41,395000,0.06,6,0,310.78


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-09_17-16-35
  done: false
  episode_len_mean: 311.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 978
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.872957968711853
          entropy_coeff: 0.009999999999999998
          kl: 0.012904271387802287
          policy_loss: -0.06543059065524075
          total_loss: -0.08222460194180409
          vf_explained_var: 0.5892431139945984
          vf_loss: 0.00038530724171626695
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,396,8680.36,396000,0.06,6,0,311.35


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-09_17-16-55
  done: false
  episode_len_mean: 313.93
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 980
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.775383636686537
          entropy_coeff: 0.009999999999999998
          kl: 0.0069795416810568035
          policy_loss: -0.035234041263659796
          total_loss: -0.05171392286817233
          vf_explained_var: 0.27871787548065186
          vf_loss: 0.00043546042773717395
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,397,8699.92,397000,0.06,6,0,313.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-09_17-17-21
  done: false
  episode_len_mean: 315.91
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 984
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9352126055293613
          entropy_coeff: 0.009999999999999998
          kl: 0.008372077054433148
          policy_loss: 0.12495279966129197
          total_loss: 0.10683369342651632
          vf_explained_var: -0.06763919442892075
          vf_loss: 0.00022723493562403342
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,398,8726.11,398000,0.06,6,0,315.91


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-09_17-17-47
  done: false
  episode_len_mean: 317.3
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 986
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9720727099312676
          entropy_coeff: 0.009999999999999998
          kl: 0.011462793074324571
          policy_loss: -0.09986008521583345
          total_loss: -0.11780493093861474
          vf_explained_var: -0.11783729493618011
          vf_loss: 0.00039879657593297046
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,399,8751.93,399000,0.06,6,0,317.3




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-09_17-18-32
  done: false
  episode_len_mean: 317.75
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 990
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9729576680395338
          entropy_coeff: 0.009999999999999998
          kl: 0.010351621865153095
          policy_loss: -0.030480165200101005
          total_loss: -0.04875798912511931
          vf_explained_var: -0.30145546793937683
          vf_loss: 0.00020815568839477945
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,400,8796.92,400000,0.06,6,0,317.75


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-09_17-19-05
  done: false
  episode_len_mean: 317.58
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 993
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9278349121411642
          entropy_coeff: 0.009999999999999998
          kl: 0.008873446982060232
          policy_loss: 0.02065134271979332
          total_loss: 0.0026256128317779966
          vf_explained_var: -0.7886888980865479
          vf_loss: 0.00018660413588804657
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,401,8829.76,401000,0.06,6,0,317.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-09_17-19-33
  done: false
  episode_len_mean: 316.95
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 997
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.004711351129744
          entropy_coeff: 0.009999999999999998
          kl: 0.008134717342667842
          policy_loss: -0.12052340753790405
          total_loss: -0.1395253432707654
          vf_explained_var: -0.4215483069419861
          vf_loss: 6.79121562118073e-05
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,402,8858.1,402000,0.06,6,0,316.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-09_17-20-02
  done: false
  episode_len_mean: 317.13
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1000
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.983719571431478
          entropy_coeff: 0.009999999999999998
          kl: 0.009378538903117257
          policy_loss: 0.06399224764770932
          total_loss: 0.04556356113817957
          vf_explained_var: 0.20882640779018402
          vf_loss: 0.000281814331295512
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,403,8887.02,403000,0.06,6,0,317.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-09_17-20-31
  done: false
  episode_len_mean: 316.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1004
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8372895426220364
          entropy_coeff: 0.009999999999999998
          kl: 0.006024360131510578
          policy_loss: -0.06538885003990597
          total_loss: -0.08285989165306092
          vf_explained_var: -0.3738316297531128
          vf_loss: 0.00017811043977013064
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,404,8915.56,404000,0.06,6,0,316.06


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-09_17-20-56
  done: false
  episode_len_mean: 317.22
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1007
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9988259487681919
          entropy_coeff: 0.009999999999999998
          kl: 0.019951989755858553
          policy_loss: -0.11935066613886092
          total_loss: -0.13672556802630426
          vf_explained_var: 0.4331044554710388
          vf_loss: 0.00021641575119954522
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,405,8940.39,405000,0.06,6,0,317.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-09_17-21-20
  done: false
  episode_len_mean: 318.1
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1010
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8374237484402127
          entropy_coeff: 0.009999999999999998
          kl: 0.015346449547311201
          policy_loss: -0.021125292426182163
          total_loss: -0.03723814537127813
          vf_explained_var: 0.14402922987937927
          vf_loss: 0.00041773476299972065
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,406,8964.93,406000,0.06,6,0,318.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-09_17-21-44
  done: false
  episode_len_mean: 319.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1013
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8576217161284552
          entropy_coeff: 0.009999999999999998
          kl: 0.010813931135963727
          policy_loss: 0.053761702651778855
          total_loss: 0.03684975720114178
          vf_explained_var: 0.2911844551563263
          vf_loss: 0.0003651358112822183
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,407,8988.64,407000,0,0,0,319.2


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-09_17-22-09
  done: false
  episode_len_mean: 319.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1016
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.6790370795461866
          entropy_coeff: 0.009999999999999998
          kl: 0.0093915675631264
          policy_loss: -0.004376292477051417
          total_loss: -0.01984717758993308
          vf_explained_var: -0.10282328724861145
          vf_loss: 0.0001912268921538877
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,408,9014.27,408000,0,0,0,319.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-09_17-22-33
  done: false
  episode_len_mean: 320.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1019
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9449763827853732
          entropy_coeff: 0.009999999999999998
          kl: 0.011152641130288762
          policy_loss: 0.05150345385902458
          total_loss: 0.03357336214847035
          vf_explained_var: -0.09217643737792969
          vf_loss: 0.00017984621566332257
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,409,9037.4,409000,0,0,0,320.78




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-09_17-23-15
  done: false
  episode_len_mean: 321.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1022
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8808266666200426
          entropy_coeff: 0.009999999999999998
          kl: 0.012272188888362498
          policy_loss: 0.027015956656800375
          total_loss: 0.009955768328573969
          vf_explained_var: -0.3216725289821625
          vf_loss: 0.0002737509461844133
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,410,9079.26,410000,0,0,0,321.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-09_17-23-40
  done: false
  episode_len_mean: 321.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1025
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9196832007831997
          entropy_coeff: 0.009999999999999998
          kl: 0.010918479365462887
          policy_loss: -0.04894003276195791
          total_loss: -0.0665223045895497
          vf_explained_var: -0.2778627276420593
          vf_loss: 0.0003028637652783396
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,411,9105.07,411000,0,0,0,321.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-09_17-24-06
  done: false
  episode_len_mean: 322.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1028
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8994915154245164
          entropy_coeff: 0.009999999999999998
          kl: 0.009084116193190954
          policy_loss: 0.01412793609003226
          total_loss: -0.003606014657351706
          vf_explained_var: 0.06755346059799194
          vf_loss: 0.0001696390947068317
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,412,9130.53,412000,0,0,0,322.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-09_17-24-35
  done: false
  episode_len_mean: 321.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1031
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.6558554967244465
          entropy_coeff: 0.009999999999999998
          kl: 0.007457224829136856
          policy_loss: -0.0445638808939192
          total_loss: -0.05997591101460987
          vf_explained_var: -0.5324363708496094
          vf_loss: 0.000250648963265121
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,413,9160.04,413000,0,0,0,321.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-09_17-25-04
  done: false
  episode_len_mean: 321.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1035
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7536212669478521
          entropy_coeff: 0.009999999999999998
          kl: 0.012963082082759354
          policy_loss: 0.09425199247068829
          total_loss: 0.0784575922621621
          vf_explained_var: -0.3991397023200989
          vf_loss: 0.00018448572655971576
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,414,9188.26,414000,0,0,0,321.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-09_17-25-34
  done: false
  episode_len_mean: 319.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1038
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.3232086287604439
          entropy_coeff: 0.009999999999999998
          kl: 0.012138882654915535
          policy_loss: -0.13348948479526573
          total_loss: -0.14504865172008674
          vf_explained_var: -0.009103468619287014
          vf_loss: 0.0002146116455454224
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,415,9218.46,415000,0,0,0,319.94


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-09_17-26-01
  done: false
  episode_len_mean: 319.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1042
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.3672072019841937
          entropy_coeff: 0.009999999999999998
          kl: 0.011956870846856779
          policy_loss: -0.06523793136907949
          total_loss: -0.07729737704826725
          vf_explained_var: 0.08776421844959259
          vf_loss: 0.00017617986777622717
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,416,9245.84,416000,0,0,0,319.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-09_17-26-26
  done: false
  episode_len_mean: 320.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1045
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9395441757308112
          entropy_coeff: 0.009999999999999998
          kl: 0.011473893984335445
          policy_loss: 0.018163981568068265
          total_loss: 0.00030032813342081177
          vf_explained_var: -0.4937950670719147
          vf_loss: 0.000153366953681042
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,417,9270.5,417000,0,0,0,320.16


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-09_17-26-52
  done: false
  episode_len_mean: 320.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1048
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7221060660150316
          entropy_coeff: 0.009999999999999998
          kl: 0.012162809673355814
          policy_loss: 0.030989650471342934
          total_loss: 0.015383781161573198
          vf_explained_var: -0.1660178005695343
          vf_loss: 0.00015400672976587277
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,418,9296.19,418000,0,0,0,320.18




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-09_17-27-35
  done: false
  episode_len_mean: 320.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1051
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7313152617878385
          entropy_coeff: 0.009999999999999998
          kl: 0.0112577806377335
          policy_loss: -0.052520890865061017
          total_loss: -0.06836691324909529
          vf_explained_var: -0.026687515899538994
          vf_loss: 0.00011467112106199946
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,419,9339.75,419000,0,0,0,320.47


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-09_17-27-59
  done: false
  episode_len_mean: 321.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1054
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.72621015575197
          entropy_coeff: 0.009999999999999998
          kl: 0.011273039045416924
          policy_loss: -0.08868344616558817
          total_loss: -0.10445359986689355
          vf_explained_var: 0.27548646926879883
          vf_loss: 0.00013765417323965165
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,420,9363.16,420000,0,0,0,321.73


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-09_17-28-23
  done: false
  episode_len_mean: 322.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1057
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7390697028901843
          entropy_coeff: 0.009999999999999998
          kl: 0.013351154066221671
          policy_loss: -0.07130924368070232
          total_loss: -0.08691547811031342
          vf_explained_var: -0.5341047644615173
          vf_loss: 0.0001805135349665458
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,421,9388.04,421000,0,0,0,322.48


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-09_17-28-45
  done: false
  episode_len_mean: 324.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1059
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0755680243174237
          entropy_coeff: 0.009999999999999998
          kl: 0.0113049023634137
          policy_loss: -0.07794397957623005
          total_loss: -0.09723817201124298
          vf_explained_var: -0.323440819978714
          vf_loss: 0.00010336801719353792
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,422,9409.63,422000,0,0,0,324.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-09_17-29-09
  done: false
  episode_len_mean: 326.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1062
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.877926692697737
          entropy_coeff: 0.009999999999999998
          kl: 0.009127831374036147
          policy_loss: -0.0024872659809059565
          total_loss: -0.02004565865629249
          vf_explained_var: -0.47139930725097656
          vf_loss: 0.00012429842972778716
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,423,9433.45,423000,0,0,0,326.57


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-09_17-29-34
  done: false
  episode_len_mean: 327.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1065
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8522854897711012
          entropy_coeff: 0.009999999999999998
          kl: 0.016368735875658933
          policy_loss: -0.02100357694758309
          total_loss: -0.03740169778466225
          vf_explained_var: -0.4233207702636719
          vf_loss: 0.0001582645229063928
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,424,9458.11,424000,0,0,0,327.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-09_17-29-58
  done: false
  episode_len_mean: 327.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1068
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.961084074444241
          entropy_coeff: 0.009999999999999998
          kl: 0.010874312673906792
          policy_loss: 0.007674745677245988
          total_loss: -0.010434757421414058
          vf_explained_var: -0.90251624584198
          vf_loss: 0.00019494925032227507
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,425,9482.17,425000,0,0,0,327.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-09_17-30-20
  done: false
  episode_len_mean: 328.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1070
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9120444258054097
          entropy_coeff: 0.009999999999999998
          kl: 0.012520624950944952
          policy_loss: -0.029634001602729162
          total_loss: -0.047132004300753275
          vf_explained_var: -0.8067418336868286
          vf_loss: 0.00011827107801865269
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,426,9504,426000,0,0,0,328.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-09_17-30-42
  done: false
  episode_len_mean: 329.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1073
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7586801621649
          entropy_coeff: 0.009999999999999998
          kl: 0.00947329527549404
          policy_loss: -0.09727907793389426
          total_loss: -0.11362743568089273
          vf_explained_var: -0.5616359114646912
          vf_loss: 0.00010036434266819722
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,427,9525.97,427000,0,0,0,329.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-09_17-31-06
  done: false
  episode_len_mean: 329.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1075
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8879193319214715
          entropy_coeff: 0.009999999999999998
          kl: 0.012765341811073503
          policy_loss: -0.011179713987641865
          total_loss: -0.028426031147440276
          vf_explained_var: -0.7416925430297852
          vf_loss: 9.930669022853382e-05
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,428,9549.95,428000,0,0,0,329.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-09_17-31-31
  done: false
  episode_len_mean: 329.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1078
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.6431107838948569
          entropy_coeff: 0.009999999999999998
          kl: 0.0060371226490985915
          policy_loss: 0.04568743082798189
          total_loss: 0.03004958958675464
          vf_explained_var: 0.01893872208893299
          vf_loss: 6.799399527355693e-05
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,429,9575.14,429000,0,0,0,329.1




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-09_17-32-13
  done: false
  episode_len_mean: 326.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1081
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7844889190461901
          entropy_coeff: 0.009999999999999998
          kl: 0.01308432463669003
          policy_loss: -0.019457774857680004
          total_loss: -0.035607517427868315
          vf_explained_var: -0.6856301426887512
          vf_loss: 0.00012325477008643147
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,430,9617.6,430000,0,0,0,326.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-09_17-32-37
  done: false
  episode_len_mean: 328.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1084
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.080077902475993
          entropy_coeff: 0.009999999999999998
          kl: 0.015304047717427308
          policy_loss: 0.04324005179935032
          total_loss: 0.024372961868842444
          vf_explained_var: -0.6761136054992676
          vf_loss: 9.513110227190837e-05
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,431,9641.74,431000,0,0,0,328.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-09_17-33-00
  done: false
  episode_len_mean: 328.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1087
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.087642214033339
          entropy_coeff: 0.009999999999999998
          kl: 0.012427892326926907
          policy_loss: -0.06283024731609556
          total_loss: -0.08205965815318955
          vf_explained_var: -0.8817446231842041
          vf_loss: 0.00015397930409461778
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,432,9664.4,432000,0,0,0,328.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-09_17-33-22
  done: false
  episode_len_mean: 330.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1089
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0724970632129245
          entropy_coeff: 0.009999999999999998
          kl: 0.014555495974418273
          policy_loss: -0.002351941085524029
          total_loss: -0.021232514538698725
          vf_explained_var: 0.14586395025253296
          vf_loss: 9.576364399334933e-05
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,433,9685.9,433000,0,0,0,330.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-09_17-33-46
  done: false
  episode_len_mean: 333.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1092
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9403729293081495
          entropy_coeff: 0.009999999999999998
          kl: 0.00786189680517116
          policy_loss: -0.06701412689354684
          total_loss: -0.08540114234719011
          vf_explained_var: -1.0
          vf_loss: 7.221872652331108e-05
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,434,9710.48,434000,0,0,0,333.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-09_17-34-12
  done: false
  episode_len_mean: 335.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1095
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9431594755914476
          entropy_coeff: 0.009999999999999998
          kl: 0.014918011973589135
          policy_loss: -0.1486215522719754
          total_loss: -0.16612167035539946
          vf_explained_var: 0.1836821436882019
          vf_loss: 0.00013929296890435378
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,435,9735.84,435000,0,0,0,335.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-09_17-34-37
  done: false
  episode_len_mean: 337.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1098
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.818774355782403
          entropy_coeff: 0.009999999999999998
          kl: 0.010075504500856555
          policy_loss: -0.02986517691363891
          total_loss: -0.046718779330452286
          vf_explained_var: -0.7297544479370117
          vf_loss: 0.00012371768397214408
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,436,9760.84,436000,0,0,0,337.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-09_17-35-00
  done: false
  episode_len_mean: 339.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1100
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9600948346985712
          entropy_coeff: 0.009999999999999998
          kl: 0.010739857905406009
          policy_loss: 0.05109250458578269
          total_loss: 0.03286981561945544
          vf_explained_var: -0.22597351670265198
          vf_loss: 8.802093923602823e-05
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,437,9784.51,437000,0,0,0,339.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-09_17-35-26
  done: false
  episode_len_mean: 340.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1103
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7655558890766567
          entropy_coeff: 0.009999999999999998
          kl: 0.01208471954037312
          policy_loss: -0.03882612693640921
          total_loss: -0.05491331244508425
          vf_explained_var: -0.3221602141857147
          vf_loss: 0.00011657199413295732
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,438,9809.81,438000,0,0,0,340.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-09_17-35-47
  done: false
  episode_len_mean: 344.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1106
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9226986289024353
          entropy_coeff: 0.009999999999999998
          kl: 0.013810877106388158
          policy_loss: -0.07037132655580838
          total_loss: -0.0878308327972061
          vf_explained_var: -0.29172050952911377
          vf_loss: 0.0001083011923684454
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,439,9831.25,439000,0,0,0,344.31


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-09_17-36-08
  done: false
  episode_len_mean: 345.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1108
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.0219956755638124
          entropy_coeff: 0.009999999999999998
          kl: 0.01520885275581475
          policy_loss: -0.05730314221647051
          total_loss: -0.07563262399699953
          vf_explained_var: -0.582245409488678
          vf_loss: 6.334861787359437e-05
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,440,9852.11,440000,0,0,0,345.98




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-09_17-36-51
  done: false
  episode_len_mean: 346.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1111
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.875592013200124
          entropy_coeff: 0.009999999999999998
          kl: 0.008696451794857474
          policy_loss: -0.03133699562814501
          total_loss: -0.04894294432467884
          vf_explained_var: -0.50581955909729
          vf_loss: 0.00010521746314932696
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,441,9895.13,441000,0,0,0,346.17


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-09_17-37-13
  done: false
  episode_len_mean: 348.01
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1114
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 2.032684670554267
          entropy_coeff: 0.009999999999999998
          kl: 0.0032746091490130622
          policy_loss: 0.00445742474661933
          total_loss: -0.0055780247060789
          vf_explained_var: -0.5365487933158875
          vf_loss: 0.009897999776472311
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,442,9917.47,442000,0.01,1,0,348.01


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-09_17-37-35
  done: false
  episode_len_mean: 350.15
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1116
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9393280294206408
          entropy_coeff: 0.009999999999999998
          kl: 0.015732615186630123
          policy_loss: -0.02568628357516395
          total_loss: -0.042787277201811476
          vf_explained_var: 0.4561188220977783
          vf_loss: 0.0013472622321892736
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,443,9938.72,443000,0.01,1,0,350.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-09_17-38-02
  done: false
  episode_len_mean: 347.97
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1119
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.3143485956721837
          entropy_coeff: 0.009999999999999998
          kl: 0.009399912286475498
          policy_loss: 0.013465722650289535
          total_loss: 0.0012899638877974617
          vf_explained_var: 0.39283984899520874
          vf_loss: 0.0004030961730879628
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,444,9966.41,444000,0.01,1,0,347.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-09_17-38-25
  done: false
  episode_len_mean: 349.26
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1122
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.7882184187571208
          entropy_coeff: 0.009999999999999998
          kl: 0.01865450836415662
          policy_loss: -0.08172121337718434
          total_loss: -0.09826577065719498
          vf_explained_var: 0.6932497620582581
          vf_loss: 0.0002170914392788998
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,445,9989.56,445000,0.01,1,0,349.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-09_17-38-50
  done: false
  episode_len_mean: 350.15
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1125
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.8689912133746678
          entropy_coeff: 0.009999999999999998
          kl: 0.018045749918440636
          policy_loss: -0.09106687721278932
          total_loss: -0.10846876775225003
          vf_explained_var: 0.7380120158195496
          vf_loss: 0.0002040491416765791
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,446,10013.9,446000,0.01,1,0,350.15


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-09_17-39-14
  done: false
  episode_len_mean: 350.85
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1128
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.01289914449056
          entropy_coeff: 0.009999999999999998
          kl: 0.015166645483739326
          policy_loss: 0.06820165134138531
          total_loss: 0.04912999528977606
          vf_explained_var: 0.5222734808921814
          vf_loss: 0.00014630276321743926
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,447,10037.9,447000,0.01,1,0,350.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-09_17-39-42
  done: false
  episode_len_mean: 351.42
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1131
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.8133036507500542
          entropy_coeff: 0.009999999999999998
          kl: 0.012082946125170264
          policy_loss: 0.03400396505991618
          total_loss: 0.016826879647043016
          vf_explained_var: 0.6780738830566406
          vf_loss: 0.00023015715851215646
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,448,10066,448000,0.01,1,0,351.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-09_17-40-08
  done: false
  episode_len_mean: 351.72
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1134
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9414383557107713
          entropy_coeff: 0.009999999999999998
          kl: 0.010941362482822924
          policy_loss: -0.08613848123285506
          total_loss: -0.10471459676822027
          vf_explained_var: 0.5124402642250061
          vf_loss: 0.00018104314787908353
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,449,10092.3,449000,0.01,1,0,351.72


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-09_17-40-34
  done: false
  episode_len_mean: 352.7
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1137
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9656315591600206
          entropy_coeff: 0.009999999999999998
          kl: 0.016597694625438128
          policy_loss: 0.01666417842109998
          total_loss: -0.0018297998441590203
          vf_explained_var: 0.0369882732629776
          vf_loss: 0.00016535058320086036
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,450,10118,450000,0.01,1,0,352.7




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-09_17-41-17
  done: false
  episode_len_mean: 354.14
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1140
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9927511705292595
          entropy_coeff: 0.009999999999999998
          kl: 0.012247895975186118
          policy_loss: -0.02590305449234115
          total_loss: -0.04493282727069325
          vf_explained_var: -0.3488783538341522
          vf_loss: 0.0001620370484791541
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,451,10160.5,451000,0.01,1,0,354.14


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-09_17-41-42
  done: false
  episode_len_mean: 354.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1143
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.1498214509752063
          entropy_coeff: 0.009999999999999998
          kl: 0.008495291964314412
          policy_loss: -0.030117812669939466
          total_loss: -0.05094417466057671
          vf_explained_var: -0.5902255177497864
          vf_loss: 0.00016155764375677488
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,452,10185.8,452000,0.01,1,0,354.11


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-09_17-42-07
  done: false
  episode_len_mean: 354.09
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1146
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9879118389553494
          entropy_coeff: 0.009999999999999998
          kl: 0.016931829319147853
          policy_loss: -0.06296837048398124
          total_loss: -0.0816758735312356
          vf_explained_var: -0.43337181210517883
          vf_loss: 0.00015455705926999347
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,453,10211.2,453000,0.01,1,0,354.09


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-09_17-42-33
  done: false
  episode_len_mean: 354.03
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1150
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.08501005437639
          entropy_coeff: 0.009999999999999998
          kl: 0.014990890952044684
          policy_loss: -0.048611842923694186
          total_loss: -0.06848731396926774
          vf_explained_var: -0.28432992100715637
          vf_loss: 7.416354431673729e-05
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_since_restore: 454
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,454,10237.3,454000,0.01,1,0,354.03


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-09_17-42-59
  done: false
  episode_len_mean: 354.44
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1153
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.2376663102044
          entropy_coeff: 0.009999999999999998
          kl: 0.009421181580436647
          policy_loss: -0.03413349000944032
          total_loss: -0.055888733598921034
          vf_explained_var: -0.45184338092803955
          vf_loss: 5.5509803102419636e-05
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_since_restore: 455
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,455,10262.7,455000,0.01,1,0,354.44


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-09_17-43-25
  done: false
  episode_len_mean: 353.24
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1156
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.8918008857303195
          entropy_coeff: 0.009999999999999998
          kl: 0.009578072245659482
          policy_loss: -0.0045929960906505585
          total_loss: -0.02285449869102902
          vf_explained_var: 0.02206122688949108
          vf_loss: 8.117131033537185e-05
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_since_restore: 456
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,456,10288.9,456000,0.01,1,0,353.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-09_17-43-52
  done: false
  episode_len_mean: 350.84
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1159
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.8447875963317024
          entropy_coeff: 0.009999999999999998
          kl: 0.010495845567741056
          policy_loss: 0.0308304063975811
          total_loss: 0.013135049160983828
          vf_explained_var: -0.5456375479698181
          vf_loss: 0.00012205569615212477
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_since_restore: 457
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,457,10315.7,457000,0.01,1,0,350.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-09_17-44-18
  done: false
  episode_len_mean: 350.16
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1162
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.1934904283947416
          entropy_coeff: 0.009999999999999998
          kl: 0.013004205748893188
          policy_loss: 0.04197799128790696
          total_loss: 0.020953824702236386
          vf_explained_var: -0.8058944940567017
          vf_loss: 0.00012960402042760203
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_since_restore: 458
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,458,10341.4,458000,0.01,1,0,350.16


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-09_17-44-44
  done: false
  episode_len_mean: 348.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1165
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.8458211713367039
          entropy_coeff: 0.009999999999999998
          kl: 0.019563388906395772
          policy_loss: -0.11999105695221159
          total_loss: -0.1371795129444864
          vf_explained_var: -0.108194500207901
          vf_loss: 9.463086324912082e-05
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iterations_since_restore: 459
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,459,10368,459000,0.01,1,0,348.9


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-09_17-45-11
  done: false
  episode_len_mean: 347.09
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1168
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.7782330049408808
          entropy_coeff: 0.009999999999999998
          kl: 0.010617524178707823
          policy_loss: -0.014359873284896215
          total_loss: -0.031404746075471245
          vf_explained_var: 0.4099505543708801
          vf_loss: 9.968698575701435e-05
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterations_since_restore: 460
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,460,10394.3,460000,0.01,1,0,347.09




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-09_17-45-55
  done: false
  episode_len_mean: 344.28
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1172
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.9844532105657788
          entropy_coeff: 0.009999999999999998
          kl: 0.010594868683749755
          policy_loss: 0.008359353927274545
          total_loss: -0.010686246740321318
          vf_explained_var: -0.20333336293697357
          vf_loss: 0.00016252557244216505
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_since_restore: 461
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,461,10438.8,461000,0.01,1,0,344.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-09_17-46-21
  done: false
  episode_len_mean: 342.52
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1175
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 2.0328180697229175
          entropy_coeff: 0.009999999999999998
          kl: 0.017075540987654767
          policy_loss: -0.031022835440105864
          total_loss: -0.05014766907940308
          vf_explained_var: -0.5302733778953552
          vf_loss: 0.00017766023108075994
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterations_since_restore: 462
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,462,10464.5,462000,0.01,1,0,342.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-09_17-46-48
  done: false
  episode_len_mean: 341.59
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1178
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06006774902343748
          cur_lr: 5.000000000000001e-05
          entropy: 1.7932559649149578
          entropy_coeff: 0.009999999999999998
          kl: 0.023364133544677795
          policy_loss: -0.029875435100661384
          total_loss: -0.04627819649047322
          vf_explained_var: -0.027525313198566437
          vf_loss: 0.00012636692643152653
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iterations_since_restore: 463
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,463,10492.2,463000,0.01,1,0,341.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-09_17-47-16
  done: false
  episode_len_mean: 340.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1181
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.6184582802984449
          entropy_coeff: 0.009999999999999998
          kl: 0.010978510957228356
          policy_loss: -0.0060015395283699036
          total_loss: -0.020922634667820402
          vf_explained_var: 0.09927268326282501
          vf_loss: 0.00027430526238782075
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_since_restore: 464
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,464,10519.5,464000,0.01,1,0,340.11


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-09_17-47-41
  done: false
  episode_len_mean: 338.72
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1184
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.9801068915261162
          entropy_coeff: 0.009999999999999998
          kl: 0.011590975589373562
          policy_loss: -0.015246873204078939
          total_loss: -0.033875494822859765
          vf_explained_var: -0.7559942603111267
          vf_loss: 0.0001280802834015857
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_since_restore: 465
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,465,10544.5,465000,0.01,1,0,338.72


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-09_17-48-04
  done: false
  episode_len_mean: 339.05
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1187
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.173541439904107
          entropy_coeff: 0.009999999999999998
          kl: 0.011862391045977697
          policy_loss: -0.053731841759549245
          total_loss: -0.07427558857533667
          vf_explained_var: -0.5368295907974243
          vf_loss: 0.00012284681417642988
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_since_restore: 466
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,466,10568,466000,0.01,1,0,339.05


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-09_17-48-29
  done: false
  episode_len_mean: 336.68
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1190
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.8421559439765083
          entropy_coeff: 0.009999999999999998
          kl: 0.015480926704361448
          policy_loss: 0.023279894888401032
          total_loss: 0.006392775062057707
          vf_explained_var: -0.2881089448928833
          vf_loss: 0.0001395838328688923
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  iterations_since_restore: 467
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,467,10592.5,467000,0.01,1,0,336.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-09_17-48-54
  done: false
  episode_len_mean: 335.38
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1193
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.7211375733216603
          entropy_coeff: 0.009999999999999998
          kl: 0.01067394128789968
          policy_loss: -0.041640547662973405
          total_loss: -0.05772922221157286
          vf_explained_var: -0.822404146194458
          vf_loss: 0.0001609639436032416
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  iterations_since_restore: 468
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,468,10618,468000,0.01,1,0,335.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-09_17-49-19
  done: false
  episode_len_mean: 335.6
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1196
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.7804573668373955
          entropy_coeff: 0.009999999999999998
          kl: 0.011620852094503517
          policy_loss: -0.025073491119676165
          total_loss: -0.04171981120275126
          vf_explained_var: -0.4912753701210022
          vf_loss: 0.00011119791161036119
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_since_restore: 469
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,469,10642.4,469000,0.01,1,0,335.6


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-09_17-49-45
  done: false
  episode_len_mean: 334.99
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1199
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.6221444076961942
          entropy_coeff: 0.009999999999999998
          kl: 0.010270651271057278
          policy_loss: -0.008554815997680027
          total_loss: -0.023694802530937725
          vf_explained_var: -0.5590023994445801
          vf_loss: 0.00015605527462159646
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations_since_restore: 470
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,470,10668.3,470000,0.01,1,0,334.99




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-09_17-50-24
  done: false
  episode_len_mean: 336.26
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1202
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.0750296420521206
          entropy_coeff: 0.009999999999999998
          kl: 0.011363615488361529
          policy_loss: -0.05404717922210693
          total_loss: -0.07358449983100096
          vf_explained_var: -0.8172051310539246
          vf_loss: 0.00018909418269888394
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iterations_since_restore: 471
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,471,10707.7,471000,0.01,1,0,336.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-09_17-50-48
  done: false
  episode_len_mean: 335.85
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1204
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.101630218823751
          entropy_coeff: 0.009999999999999998
          kl: 0.01185327769658699
          policy_loss: -0.04515853180653519
          total_loss: -0.06501005612727669
          vf_explained_var: -1.0
          vf_loss: 9.677809462623877e-05
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iterations_since_restore: 472
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,472,10731.5,472000,0.01,1,0,335.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-09_17-51-12
  done: false
  episode_len_mean: 333.45
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1207
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.6733544985453288
          entropy_coeff: 0.009999999999999998
          kl: 0.011302742305142032
          policy_loss: -0.0374960840990146
          total_loss: -0.0531154849463039
          vf_explained_var: -0.7163342833518982
          vf_loss: 9.574846472888667e-05
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iterations_since_restore: 473
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,473,10755.6,473000,0.01,1,0,333.45


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-09_17-51-36
  done: false
  episode_len_mean: 333.74
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1210
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.9780812078052097
          entropy_coeff: 0.009999999999999998
          kl: 0.011290562030325477
          policy_loss: 0.015522994763321347
          total_loss: -0.0031101625826623703
          vf_explained_var: -0.5786213874816895
          vf_loss: 0.0001303576865514818
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_since_restore: 474
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,474,10779.1,474000,0.01,1,0,333.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-09_17-51-58
  done: false
  episode_len_mean: 334.34
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1212
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.0207506484455533
          entropy_coeff: 0.009999999999999998
          kl: 0.009749747737758222
          policy_loss: -0.024391951080825595
          total_loss: -0.04365227934386995
          vf_explained_var: -0.7483714818954468
          vf_loss: 6.871097830298822e-05
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations_since_restore: 475
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,475,10801.9,475000,0.01,1,0,334.34


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-09_17-52-20
  done: false
  episode_len_mean: 334.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1215
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.0301479246881273
          entropy_coeff: 0.009999999999999998
          kl: 0.00995397386257761
          policy_loss: -0.021480186076627838
          total_loss: -0.04075782754355007
          vf_explained_var: -0.7163305282592773
          vf_loss: 0.00012696582909686388
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_since_restore: 476
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,476,10823.1,476000,0,0,0,334.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-09_17-52-41
  done: false
  episode_len_mean: 335.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1217
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 2.0336864524417453
          entropy_coeff: 0.009999999999999998
          kl: 0.010018133671489692
          policy_loss: 0.009323620547850927
          total_loss: -0.009985389229324128
          vf_explained_var: -0.9063633680343628
          vf_loss: 0.000125204777416204
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iterations_since_restore: 477
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,477,10844.1,477000,0,0,0,335.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-09_17-53-03
  done: false
  episode_len_mean: 337.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1220
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09010162353515627
          cur_lr: 5.000000000000001e-05
          entropy: 1.9617074489593507
          entropy_coeff: 0.009999999999999998
          kl: 0.020534243469304294
          policy_loss: 0.030236334933174982
          total_loss: 0.023449485003948212
          vf_explained_var: -0.6553041934967041
          vf_loss: 0.010980058288259898
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations_since_restore: 478
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,478,10866.1,478000,-0.01,0,-1,337.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-09_17-53-26
  done: false
  episode_len_mean: 338.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1222
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.9678347322675918
          entropy_coeff: 0.009999999999999998
          kl: 0.013856902464414213
          policy_loss: -0.0967021317117744
          total_loss: -0.11429879317680995
          vf_explained_var: 0.06782595068216324
          vf_loss: 0.00020889077091447284
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_since_restore: 479
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,479,10889.5,479000,-0.01,0,-1,338.66


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-09_17-53-48
  done: false
  episode_len_mean: 338.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1225
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.8420290205213758
          entropy_coeff: 0.009999999999999998
          kl: 0.013110084206476379
          policy_loss: -0.0888643273876773
          total_loss: -0.10526330090231366
          vf_explained_var: 0.20519621670246124
          vf_loss: 0.00024945747847798176
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_since_restore: 480
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,480,10911.8,480000,-0.01,0,-1,338.82


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-09_17-54-11
  done: false
  episode_len_mean: 339.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1228
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.9959371169408162
          entropy_coeff: 0.009999999999999998
          kl: 0.016602143631714267
          policy_loss: 0.022808645748429827
          total_loss: 0.005243645111719767
          vf_explained_var: -0.036942485719919205
          vf_loss: 0.00015055117029179303
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iterations_since_restore: 481
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,481,10934.6,481000,-0.01,0,-1,339.68




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-09_17-54-53
  done: false
  episode_len_mean: 341.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 1230
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.8969899151060317
          entropy_coeff: 0.009999999999999998
          kl: 0.018038563375843433
          policy_loss: -0.029216835854782
          total_loss: -0.04560089686678515
          vf_explained_var: -0.2436368763446808
          vf_loss: 0.00014788010707383767
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_since_restore: 482
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,482,10976,482000,-0.01,0,-1,341.2


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-09_17-55-15
  done: false
  episode_len_mean: 343.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 3
  episodes_total: 1233
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 2.040954959392548
          entropy_coeff: 0.009999999999999998
          kl: 0.01195283328476831
          policy_loss: -0.024853681441810396
          total_loss: -0.04349958904915386
          vf_explained_var: -0.32130032777786255
          vf_loss: 0.0001481863935219331
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterations_since_restore: 483
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,483,10997.9,483000,-0.01,0,-1,343.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-09_17-55-38
  done: false
  episode_len_mean: 345.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1236
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 1.965387749671936
          entropy_coeff: 0.009999999999999998
          kl: 0.026788656052428195
          policy_loss: 0.02961403396394518
          total_loss: 0.044927839934825894
          vf_explained_var: -0.6109439134597778
          vf_loss: 0.03134712892286997
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_since_restore: 484
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,484,11020.9,484000,-0.03,0,-2,345.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-09_17-56-00
  done: false
  episode_len_mean: 346.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1238
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.8954002526071336
          entropy_coeff: 0.009999999999999998
          kl: 0.010571371393014199
          policy_loss: -0.15028227219978968
          total_loss: -0.16670248690578673
          vf_explained_var: 0.7758537530899048
          vf_loss: 0.00039066400948084063
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_since_restore: 485
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,485,11042.9,485000,-0.03,0,-2,346.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-09_17-56-22
  done: false
  episode_len_mean: 348.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1241
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.9540720595253838
          entropy_coeff: 0.009999999999999998
          kl: 0.010218227698326822
          policy_loss: 0.004817003177271949
          total_loss: -0.012415820194615259
          vf_explained_var: 0.555424690246582
          vf_loss: 0.00023636982578965318
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_since_restore: 486
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,486,11065.8,486000,-0.03,0,-2,348.89


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-09_17-56-45
  done: false
  episode_len_mean: 350.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1243
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.9279448098606533
          entropy_coeff: 0.009999999999999998
          kl: 0.010213805823752567
          policy_loss: -0.08954066381686264
          total_loss: -0.10656019527879027
          vf_explained_var: 0.5897126197814941
          vf_loss: 0.00018928338621561932
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_since_restore: 487
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,487,11087.8,487000,-0.03,0,-2,350.53


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-09_17-57-05
  done: false
  episode_len_mean: 352.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1246
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.8367931021584405
          entropy_coeff: 0.009999999999999998
          kl: 0.009900415087845064
          policy_loss: 0.03237685523927212
          total_loss: 0.01619465094473627
          vf_explained_var: -0.2768970727920532
          vf_loss: 0.00017863014468780925
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_since_restore: 488
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,488,11108.5,488000,-0.03,0,-2,352.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-09_17-57-29
  done: false
  episode_len_mean: 353.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1248
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.8715436591042414
          entropy_coeff: 0.009999999999999998
          kl: 0.006566391589740227
          policy_loss: -0.046776481428080136
          total_loss: -0.0639927691883511
          vf_explained_var: -0.5975600481033325
          vf_loss: 0.00016795134466115593
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since_restore: 489
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,489,11132.4,489000,-0.03,0,-2,353.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-09_17-57-51
  done: false
  episode_len_mean: 355.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1251
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.867292214764489
          entropy_coeff: 0.009999999999999998
          kl: 0.007835092724498062
          policy_loss: -0.06455815757314363
          total_loss: -0.08153098275264105
          vf_explained_var: -0.3875837028026581
          vf_loss: 0.00011169871031597722
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_restore: 490
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,490,11154.4,490000,-0.03,0,-2,355.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-09_17-58-13
  done: false
  episode_len_mean: 356.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1253
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.8708825363053216
          entropy_coeff: 0.009999999999999998
          kl: 0.00978402781162081
          policy_loss: -0.006050688442256716
          total_loss: -0.022596681490540504
          vf_explained_var: -0.278639554977417
          vf_loss: 0.00017932881989205876
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since_restore: 491
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,491,11175.8,491000,-0.03,0,-2,356.19


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-09_17-58-35
  done: false
  episode_len_mean: 358.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1256
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 1.8642119103007846
          entropy_coeff: 0.009999999999999998
          kl: 0.025833023338915694
          policy_loss: -0.03328143357195788
          total_loss: -0.046461743995961216
          vf_explained_var: -0.4709561765193939
          vf_loss: 0.00022471395358378586
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_since_restore: 492
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,492,11198,492000,-0.03,0,-2,358.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-09_17-58-56
  done: false
  episode_len_mean: 360.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1258
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8456408593389724
          entropy_coeff: 0.009999999999999998
          kl: 0.008486976161423203
          policy_loss: -0.029558578216367298
          total_loss: -0.04527211553520626
          vf_explained_var: -0.2384376972913742
          vf_loss: 0.00016204048960288573
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_restore: 493
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,493,11218.7,493000,-0.03,0,-2,360.71




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-09_17-59-36
  done: false
  episode_len_mean: 362.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1261
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.927603464656406
          entropy_coeff: 0.009999999999999998
          kl: 0.010863747587061419
          policy_loss: 0.012272036655081643
          total_loss: -0.003519730476869477
          vf_explained_var: -0.8039476871490479
          vf_loss: 0.00018067956294140055
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_since_restore: 494
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,494,11259.2,494000,-0.03,0,-2,362.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-09_17-59-59
  done: false
  episode_len_mean: 364.02
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1264
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.977193378077613
          entropy_coeff: 0.009999999999999998
          kl: 0.00932986831760516
          policy_loss: 0.009973694880803427
          total_loss: -0.006787752442889743
          vf_explained_var: -1.0
          vf_loss: 0.00017333785346838543
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since_restore: 495
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,495,11282.6,495000,-0.03,0,-2,364.02


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-09_18-00-22
  done: false
  episode_len_mean: 365.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1266
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.938465326362186
          entropy_coeff: 0.009999999999999998
          kl: 0.007094061877686543
          policy_loss: -0.09121195148262713
          total_loss: -0.10832868038366238
          vf_explained_var: -0.8180835247039795
          vf_loss: 0.00011066859014034789
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_since_restore: 496
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,496,11304.6,496000,-0.03,0,-2,365.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-09_18-00-45
  done: false
  episode_len_mean: 367.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1269
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.928679683473375
          entropy_coeff: 0.009999999999999998
          kl: 0.010917037483054375
          policy_loss: -0.03268705668548743
          total_loss: -0.04851673804223537
          vf_explained_var: -0.9952496886253357
          vf_loss: 0.00013732295014455708
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_since_restore: 497
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,497,11327.9,497000,-0.03,0,-2,367.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-09_18-01-06
  done: false
  episode_len_mean: 370.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1272
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7607002351019116
          entropy_coeff: 0.009999999999999998
          kl: 0.011217946968452205
          policy_loss: -0.01349127023584313
          total_loss: -0.027580098062753678
          vf_explained_var: -0.23568294942378998
          vf_loss: 0.00010687537712025611
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_since_restore: 498


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,498,11349.5,498000,-0.03,0,-2,370.33


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-09_18-01-28
  done: false
  episode_len_mean: 372.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1274
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9992495205667284
          entropy_coeff: 0.009999999999999998
          kl: 0.00732112619636249
          policy_loss: -0.07186120324250725
          total_loss: -0.08948164766447411
          vf_explained_var: -0.729261040687561
          vf_loss: 0.00014574744475087046
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_since_restore: 499
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,499,11371.1,499000,-0.03,0,-2,372.33


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-09_18-01-51
  done: false
  episode_len_mean: 374.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1277
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.982496032449934
          entropy_coeff: 0.009999999999999998
          kl: 0.007563971971904277
          policy_loss: -0.08594619805614154
          total_loss: -0.10338130998942587
          vf_explained_var: -0.12748372554779053
          vf_loss: 8.969750465944849e-05
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations_since_restore: 500
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,500,11394,500000,-0.03,0,-2,374.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-09_18-02-12
  done: false
  episode_len_mean: 377.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1279
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.109001061651442
          entropy_coeff: 0.009999999999999998
          kl: 0.009129082858445593
          policy_loss: -0.12093195352289411
          total_loss: -0.13907989511887234
          vf_explained_var: -1.0
          vf_loss: 0.00016597815912165162
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_since_restore: 501
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,501,11415.4,501000,-0.03,0,-2,377.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-09_18-02-35
  done: false
  episode_len_mean: 379.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1282
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8937624335289
          entropy_coeff: 0.009999999999999998
          kl: 0.00968196926003801
          policy_loss: -0.06667740932769245
          total_loss: -0.08246769317322307
          vf_explained_var: -0.5476077795028687
          vf_loss: 0.00020311993850757263
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_since_restore: 502
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,502,11438.1,502000,-0.03,0,-2,379.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-09_18-02-57
  done: false
  episode_len_mean: 381.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1284
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0146983636750115
          entropy_coeff: 0.009999999999999998
          kl: 0.008406772078993402
          policy_loss: -0.05729822917944855
          total_loss: -0.0746305676177144
          vf_explained_var: -1.0
          vf_loss: 0.0002582029895468925
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_since_restore: 503
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,503,11459.7,503000,-0.03,0,-2,381.09


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-09_18-03-19
  done: false
  episode_len_mean: 381.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1287
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8391591919793022
          entropy_coeff: 0.009999999999999998
          kl: 0.008562824155669209
          policy_loss: 0.07685695971465772
          total_loss: 0.06127461550964249
          vf_explained_var: -0.7533277869224548
          vf_loss: 0.00020535145393094152
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_since_restore: 504
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,504,11482.3,504000,-0.03,0,-2,381.6


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-09_18-03-43
  done: false
  episode_len_mean: 381.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1289
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9941617276933459
          entropy_coeff: 0.009999999999999998
          kl: 0.0066850405678210755
          policy_loss: -0.09087777193635702
          total_loss: -0.10867480906761355
          vf_explained_var: -1.0
          vf_loss: 0.00011170629812921915
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_restore: 505
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,505,11505.9,505000,-0.03,0,-2,381.85




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-09_18-04-21
  done: false
  episode_len_mean: 383.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1292
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9898854573567708
          entropy_coeff: 0.009999999999999998
          kl: 0.009672464064140879
          policy_loss: -0.022649069420165487
          total_loss: -0.0394686219178968
          vf_explained_var: -0.652441680431366
          vf_loss: 0.00013797233639504863
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since_restore: 506
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,506,11544.2,506000,-0.03,0,-2,383.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-09_18-04-44
  done: false
  episode_len_mean: 385.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1294
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9726806203524272
          entropy_coeff: 0.009999999999999998
          kl: 0.009563107681710864
          policy_loss: -0.07806181328164206
          total_loss: -0.09473998703890377
          vf_explained_var: -0.8023274540901184
          vf_loss: 0.00014055750200188616
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_since_restore: 507
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,507,11566.7,507000,-0.03,0,-2,385.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-09_18-05-06
  done: false
  episode_len_mean: 386.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1297
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0525240593486362
          entropy_coeff: 0.009999999999999998
          kl: 0.017455980678318116
          policy_loss: -0.005918734106752608
          total_loss: -0.020951338443491195
          vf_explained_var: -1.0
          vf_loss: 0.00018439600575624758
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterations_since_restore: 508
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,508,11588.4,508000,-0.03,0,-2,386.36


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-09_18-05-28
  done: false
  episode_len_mean: 388.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1300
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7673344254493712
          entropy_coeff: 0.009999999999999998
          kl: 0.011154669727423638
          policy_loss: -0.02769115360246764
          total_loss: -0.04167950054009755
          vf_explained_var: -0.9492846727371216
          vf_loss: 0.00029293843318656503
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,509,11610.6,509000,-0.03,0,-2,388.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-09_18-05-50
  done: false
  episode_len_mean: 387.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1302
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0817722850375704
          entropy_coeff: 0.009999999999999998
          kl: 0.010331889628439781
          policy_loss: -0.08550549579991235
          total_loss: -0.10302148622771104
          vf_explained_var: -0.8728621006011963
          vf_loss: 0.00015987798162516104
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,510,11632.8,510000,-0.03,0,-2,387.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-09_18-06-12
  done: false
  episode_len_mean: 388.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1305
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9119209461741977
          entropy_coeff: 0.009999999999999998
          kl: 0.01444550676669982
          policy_loss: -0.04302084238992797
          total_loss: -0.057502912481625874
          vf_explained_var: -0.7661482691764832
          vf_loss: 0.000244359698182153
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,511,11654.6,511000,-0.03,0,-2,388.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-09_18-06-37
  done: false
  episode_len_mean: 387.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1308
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8779076841142444
          entropy_coeff: 0.009999999999999998
          kl: 0.009862364494201033
          policy_loss: -0.005019678278929658
          total_loss: -0.020560545639859304
          vf_explained_var: -0.5727751851081848
          vf_loss: 0.00023913468725772366
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations_since_restore: 512


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,512,11679.8,512000,-0.03,0,-2,387.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-09_18-07-01
  done: false
  episode_len_mean: 387.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1311
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.133110476864709
          entropy_coeff: 0.009999999999999998
          kl: 0.008489428690613367
          policy_loss: -0.046818869271212156
          total_loss: -0.06543399509456423
          vf_explained_var: -0.7560714483261108
          vf_loss: 0.0001344011307891277
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,513,11703.5,513000,-0.03,0,-2,387.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-09_18-07-24
  done: false
  episode_len_mean: 386.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1313
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.1557595200008817
          entropy_coeff: 0.009999999999999998
          kl: 0.01047952804361467
          policy_loss: -0.00500803180038929
          total_loss: -0.02322229018641843
          vf_explained_var: -0.535767138004303
          vf_loss: 0.00015658490964496095
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  iterations_since_restore: 514
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,514,11726.3,514000,-0.03,0,-2,386.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-09_18-07-46
  done: false
  episode_len_mean: 386.18
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1316
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.066214752197266
          entropy_coeff: 0.009999999999999998
          kl: 0.01236920443821688
          policy_loss: 0.07009722102019522
          total_loss: 0.05334332262476285
          vf_explained_var: -0.37906304001808167
          vf_loss: 0.00014685879941680468
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 515
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,515,11748.6,515000,-0.03,0,-2,386.18


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-09_18-08-08
  done: false
  episode_len_mean: 385.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1318
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0033093319998847
          entropy_coeff: 0.009999999999999998
          kl: 0.01370913809335899
          policy_loss: -0.050652140213383566
          total_loss: -0.06636414229869843
          vf_explained_var: -0.42157742381095886
          vf_loss: 0.00015224099665323996
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 516
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,516,11771,516000,-0.03,0,-2,385.79




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-09_18-08-49
  done: false
  episode_len_mean: 384.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1321
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9546663377020095
          entropy_coeff: 0.009999999999999998
          kl: 0.016555113461921976
          policy_loss: -0.07079720276718339
          total_loss: -0.08508734525077873
          vf_explained_var: -0.488555908203125
          vf_loss: 0.00022222747955109097
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 517
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,517,11811.3,517000,-0.02,0,-2,384.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-09_18-09-11
  done: false
  episode_len_mean: 384.39
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1324
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0490801970163983
          entropy_coeff: 0.009999999999999998
          kl: 0.012597955045648727
          policy_loss: -0.01205539374301831
          total_loss: -0.028556744340393278
          vf_explained_var: -1.0
          vf_loss: 0.00015850190862288905
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 518
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,518,11833.3,518000,-0.02,0,-2,384.39


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-09_18-09-35
  done: false
  episode_len_mean: 383.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1327
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.1051625185542635
          entropy_coeff: 0.009999999999999998
          kl: 0.008783646914337358
          policy_loss: -0.06016969204776817
          total_loss: -0.07835999743805991
          vf_explained_var: -0.32022368907928467
          vf_loss: 0.0001902722378468348
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 519
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,519,11857.3,519000,-0.02,0,-2,383.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-09_18-09-56
  done: false
  episode_len_mean: 384.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 1329
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.042126860883501
          entropy_coeff: 0.009999999999999998
          kl: 0.00993470443283149
          policy_loss: 0.03483097648455037
          total_loss: 0.017578681682546934
          vf_explained_var: -0.9319359660148621
          vf_loss: 0.00014790307646358593
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_since_restore: 520
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,520,11878.2,520000,-0.02,0,-2,384.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-09_18-10-18
  done: false
  episode_len_mean: 384.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 3
  episodes_total: 1332
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9992787387635973
          entropy_coeff: 0.009999999999999998
          kl: 0.009845182094176232
          policy_loss: -0.055825716795192826
          total_loss: -0.07268505932556259
          vf_explained_var: -0.6206755638122559
          vf_loss: 0.00013959266596227988
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_since_restore: 521
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,521,11900.7,521000,-0.02,0,-2,384.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-09_18-10-40
  done: false
  episode_len_mean: 384.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1334
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0852510770161947
          entropy_coeff: 0.009999999999999998
          kl: 0.007712204655930854
          policy_loss: -0.016212834790349007
          total_loss: -0.034526223896278276
          vf_explained_var: -0.6202932000160217
          vf_loss: 0.00019388948736983973
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iterations_since_restore: 522
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,522,11922.7,522000,0,0,0,384.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-09_18-11-03
  done: false
  episode_len_mean: 384.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1337
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9549530731307136
          entropy_coeff: 0.009999999999999998
          kl: 0.011813401863602893
          policy_loss: -0.08018293132384618
          total_loss: -0.09598400125073062
          vf_explained_var: -0.6141341924667358
          vf_loss: 0.00015609226013313875
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_since_restore: 523
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,523,11945.5,523000,0,0,0,384.3


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-09_18-11-25
  done: false
  episode_len_mean: 384.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1340
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7075801756646898
          entropy_coeff: 0.009999999999999998
          kl: 0.01136713655576123
          policy_loss: -0.03651740292294158
          total_loss: -0.05000641865448819
          vf_explained_var: -0.9625991582870483
          vf_loss: 0.000130120131749815
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterations_since_restore: 524
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,524,11967.1,524000,0,0,0,384.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-09_18-11-47
  done: false
  episode_len_mean: 384.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1342
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0524352908134462
          entropy_coeff: 0.009999999999999998
          kl: 0.01004569376606494
          policy_loss: -0.027176740517218908
          total_loss: -0.04453759027851952
          vf_explained_var: -1.0
          vf_loss: 0.00010867844507124068
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iterations_since_restore: 525
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,525,11989.8,525000,0,0,0,384.09


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-09_18-12-08
  done: false
  episode_len_mean: 384.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1345
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9903757360246446
          entropy_coeff: 0.009999999999999998
          kl: 0.011805462168532286
          policy_loss: -0.07643341142684221
          total_loss: -0.09265258188048998
          vf_explained_var: -0.78813236951828
          vf_loss: 9.463052269388249e-05
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iterations_since_restore: 526
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,526,12010.3,526000,0,0,0,384.75


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-09_18-12-30
  done: false
  episode_len_mean: 384.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1347
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9152963015768263
          entropy_coeff: 0.009999999999999998
          kl: 0.008201522243700094
          policy_loss: 0.001217634841385815
          total_loss: -0.015201089469095071
          vf_explained_var: -1.0
          vf_loss: 0.00024021536228246987
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iterations_since_restore: 527
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,527,12032.3,527000,0,0,0,384.38




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-09_18-13-09
  done: false
  episode_len_mean: 384.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1350
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0568999422921075
          entropy_coeff: 0.009999999999999998
          kl: 0.009673489947813883
          policy_loss: 0.043287428468465804
          total_loss: 0.025817278979553118
          vf_explained_var: -0.9894541501998901
          vf_loss: 0.0001572094587673847
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iterations_since_restore: 528
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,528,12071.1,528000,0,0,0,384.67


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-09_18-13-29
  done: false
  episode_len_mean: 385.23
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1352
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9624014099438984
          entropy_coeff: 0.009999999999999998
          kl: 0.009881252450924964
          policy_loss: -0.006708382732338375
          total_loss: -0.023171363605393305
          vf_explained_var: -1.0
          vf_loss: 0.00015621542851375933
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iterations_since_restore: 529
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,529,12091.4,529000,0,0,0,385.23


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-09_18-13-49
  done: false
  episode_len_mean: 385.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1355
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245330188009474
          entropy_coeff: 0.009999999999999998
          kl: 0.008958459945126802
          policy_loss: -0.002476801143752204
          total_loss: -0.01782913777149386
          vf_explained_var: -0.9996849894523621
          vf_loss: 0.00016878711329708392
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iterations_since_restore: 530
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,530,12111.8,530000,0,0,0,385.69


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-09_18-14-10
  done: false
  episode_len_mean: 386.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1357
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.044090900156233
          entropy_coeff: 0.009999999999999998
          kl: 0.009815308963053503
          policy_loss: -0.00043607960558599896
          total_loss: -0.01773984879255295
          vf_explained_var: -1.0
          vf_loss: 0.00015237369346626412
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iterations_since_restore: 531
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,531,12132.1,531000,0,0,0,386.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-09_18-14-31
  done: false
  episode_len_mean: 386.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1359
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8250451776716443
          entropy_coeff: 0.009999999999999998
          kl: 0.01167043157821897
          policy_loss: -0.04719805692632993
          total_loss: -0.06175270173698664
          vf_explained_var: -0.999273419380188
          vf_loss: 0.0001469088274461683
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_since_restore: 532
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,532,12153.1,532000,0,0,0,386.82


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-09_18-14-55
  done: false
  episode_len_mean: 386.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1362
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.821203476852841
          entropy_coeff: 0.009999999999999998
          kl: 0.009268304645189534
          policy_loss: 0.007795010714067353
          total_loss: -0.007466470532947116
          vf_explained_var: -1.0
          vf_loss: 0.00013212497756790576
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_since_restore: 533
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,533,12177,533000,0,0,0,386.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-09_18-15-15
  done: false
  episode_len_mean: 387.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1364
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.856311900085873
          entropy_coeff: 0.009999999999999998
          kl: 0.012108029217302037
          policy_loss: -0.05760965227252907
          total_loss: -0.07221726253628731
          vf_explained_var: -1.0
          vf_loss: 0.0002735432475876425
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,534,12197.4,534000,0,0,0,387.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-09_18-15-38
  done: false
  episode_len_mean: 387.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1367
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9481301758024427
          entropy_coeff: 0.009999999999999998
          kl: 0.010343906218704724
          policy_loss: -0.05461048943301042
          total_loss: -0.07076058321528965
          vf_explained_var: -1.0
          vf_loss: 0.00018569946065933134
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,535,12220.2,535000,0,0,0,387.9


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-09_18-15-59
  done: false
  episode_len_mean: 388.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1369
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8447206179300943
          entropy_coeff: 0.009999999999999998
          kl: 0.009208061197148254
          policy_loss: 0.009497135256727537
          total_loss: -0.006038643129997783
          vf_explained_var: -0.6874533891677856
          vf_loss: 0.00011132022607651177
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_since_restore: 536
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,536,12241.4,536000,0,0,0,388.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-09_18-16-23
  done: false
  episode_len_mean: 388.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1372
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8911737269825406
          entropy_coeff: 0.009999999999999998
          kl: 0.009581566782987968
          policy_loss: -0.03575632588730918
          total_loss: -0.051534044908152686
          vf_explained_var: -0.5446764826774597
          vf_loss: 0.00022033251298125833
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterations_since_restore: 537
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,537,12264.9,537000,0,0,0,388.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-09_18-16-44
  done: false
  episode_len_mean: 387.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1375
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.797194254398346
          entropy_coeff: 0.009999999999999998
          kl: 0.013246448130947306
          policy_loss: 0.04679078339702553
          total_loss: 0.0330796979367733
          vf_explained_var: -0.8373427391052246
          vf_loss: 0.00023270325610711654
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations_since_restore: 538
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,538,12286.3,538000,0,0,0,387.45


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-09_18-17-07
  done: false
  episode_len_mean: 386.89
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1377
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9290532721413507
          entropy_coeff: 0.009999999999999998
          kl: 0.01453959498962484
          policy_loss: 0.003919476229283545
          total_loss: -0.010778426089220576
          vf_explained_var: -0.7414442896842957
          vf_loss: 0.00017124088675094147
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations_since_restore: 539
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,539,12309.6,539000,0,0,0,386.89




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-09_18-17-48
  done: false
  episode_len_mean: 384.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1380
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8970601386494106
          entropy_coeff: 0.009999999999999998
          kl: 0.009689823058732077
          policy_loss: -0.06652134066033695
          total_loss: -0.0823913660314348
          vf_explained_var: -0.6865550875663757
          vf_loss: 0.00015396833995408895
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations_since_restore: 540
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,540,12349.7,540000,0,0,0,384.96


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-09_18-18-10
  done: false
  episode_len_mean: 385.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1383
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8223902450667486
          entropy_coeff: 0.009999999999999998
          kl: 0.008386696132681613
          policy_loss: -0.015562566556036472
          total_loss: -0.031082256655726167
          vf_explained_var: -0.46738848090171814
          vf_loss: 0.00015387705821518063
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations_since_restore: 541
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,541,12371.7,541000,0,0,0,385.7


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-09_18-18-32
  done: false
  episode_len_mean: 385.21
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1385
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8563162273830838
          entropy_coeff: 0.009999999999999998
          kl: 0.007098352380508801
          policy_loss: -0.01398759393228425
          total_loss: -0.0302026589297586
          vf_explained_var: -0.7306466102600098
          vf_loss: 0.00018953681994591737
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_since_restore: 542
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,542,12393.9,542000,0,0,0,385.21


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-09_18-18-54
  done: false
  episode_len_mean: 385.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1388
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8792046507199605
          entropy_coeff: 0.009999999999999998
          kl: 0.010679598295964135
          policy_loss: -0.05302681128184001
          total_loss: -0.0684132950173484
          vf_explained_var: -0.9041951298713684
          vf_loss: 0.00015797278134010008
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations_since_restore: 543
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,543,12416.5,543000,0,0,0,385.08


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-09_18-19-17
  done: false
  episode_len_mean: 385.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1391
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8391053981251186
          entropy_coeff: 0.009999999999999998
          kl: 0.012225656720883845
          policy_loss: -0.06329420951919423
          total_loss: -0.07780010938230487
          vf_explained_var: -1.0
          vf_loss: 0.0001674163670233813
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_since_restore: 544
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,544,12439.2,544000,0,0,0,385.07


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-09_18-19-37
  done: false
  episode_len_mean: 385.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1393
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9002250512441
          entropy_coeff: 0.009999999999999998
          kl: 0.011906249935778766
          policy_loss: -0.018320196639332507
          total_loss: -0.03342199998183383
          vf_explained_var: -1.0
          vf_loss: 0.0002798405008636312
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations_since_restore: 545
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,545,12459.4,545000,0,0,0,385.67


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-09_18-19-58
  done: false
  episode_len_mean: 386.35
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1395
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9084676305452983
          entropy_coeff: 0.009999999999999998
          kl: 0.010649295039860441
          policy_loss: -0.06472128998074267
          total_loss: -0.08027870315644477
          vf_explained_var: -1.0
          vf_loss: 0.00028888605108174184
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_since_restore: 546
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,546,12480,546000,0,0,0,386.35


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-09_18-20-17
  done: false
  episode_len_mean: 387.46
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1397
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.873881623480055
          entropy_coeff: 0.009999999999999998
          kl: 0.009917927662038235
          policy_loss: -0.11015058697925674
          total_loss: -0.1256785096393691
          vf_explained_var: -1.0
          vf_loss: 0.00019492241570131025
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iterations_since_restore: 547
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,547,12498.8,547000,0,0,0,387.46


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-09_18-20-39
  done: false
  episode_len_mean: 388.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1400
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.614620245827569
          entropy_coeff: 0.009999999999999998
          kl: 0.011734531574611514
          policy_loss: -0.03614903071688281
          total_loss: -0.048550260812044145
          vf_explained_var: -0.7241076827049255
          vf_loss: 0.00017658216109137155
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_since_restore: 548
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,548,12520.6,548000,0,0,0,388.4


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-09_18-20-59
  done: false
  episode_len_mean: 388.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1402
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7624740256203546
          entropy_coeff: 0.009999999999999998
          kl: 0.008146594846750371
          policy_loss: -0.055013928117437494
          total_loss: -0.0699237147346139
          vf_explained_var: -0.9556871056556702
          vf_loss: 0.0002376313069059203
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_since_restore: 549
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,549,12541.2,549000,0,0,0,388.9


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-09_18-21-20
  done: false
  episode_len_mean: 389.85
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1405
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.660128927230835
          entropy_coeff: 0.009999999999999998
          kl: 0.010911954874447795
          policy_loss: -0.027652154945664937
          total_loss: -0.040748710309465724
          vf_explained_var: -0.9986883997917175
          vf_loss: 0.00018648289850615484
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_since_restore: 550
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,550,12562.4,550000,0,0,0,389.85


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-09_18-21-41
  done: false
  episode_len_mean: 391.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1407
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4460546718703375
          entropy_coeff: 0.009999999999999998
          kl: 0.0071607794985180506
          policy_loss: -0.0661852935122119
          total_loss: -0.07826558608147834
          vf_explained_var: -1.0
          vf_loss: 0.00020270910220763957
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations_since_restore: 551
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,551,12582.6,551000,0,0,0,391.81




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-09_18-22-19
  done: false
  episode_len_mean: 393.16
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1410
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5702924251556396
          entropy_coeff: 0.009999999999999998
          kl: 0.008661427086169718
          policy_loss: -0.035889260884788304
          total_loss: -0.04880929332640436
          vf_explained_var: -0.9942392110824585
          vf_loss: 0.00014901252798154018
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations_since_restore: 552
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,552,12621.1,552000,0,0,0,393.16


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-09_18-22-41
  done: false
  episode_len_mean: 393.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1412
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6270660003026327
          entropy_coeff: 0.009999999999999998
          kl: 0.008899996099940743
          policy_loss: -0.03412623343368371
          total_loss: -0.04751347659362687
          vf_explained_var: -0.9766271114349365
          vf_loss: 0.00017698937242837726
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iterations_since_restore: 553
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,553,12642.6,553000,0,0,0,393.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-09_18-23-03
  done: false
  episode_len_mean: 393.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1415
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.541213877995809
          entropy_coeff: 0.009999999999999998
          kl: 0.009033391445844903
          policy_loss: -0.07741014435887336
          total_loss: -0.0899755410850048
          vf_explained_var: -0.9568971395492554
          vf_loss: 9.975096355548076e-05
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iterations_since_restore: 554
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,554,12664.5,554000,0,0,0,393.49


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-09_18-23-23
  done: false
  episode_len_mean: 393.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1417
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5228526605500114
          entropy_coeff: 0.009999999999999998
          kl: 0.008964660366903882
          policy_loss: -0.011754664199219809
          total_loss: -0.024155711755156517
          vf_explained_var: -0.9799001216888428
          vf_loss: 0.00010138857109672648
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations_since_restore: 555
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,555,12684.9,555000,0,0,0,393.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-09_18-23-45
  done: false
  episode_len_mean: 394.64
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1420
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5726435396406386
          entropy_coeff: 0.009999999999999998
          kl: 0.01056052406738098
          policy_loss: -0.022031203698780803
          total_loss: -0.03437952581379149
          vf_explained_var: -0.1149015724658966
          vf_loss: 0.00016673295588892262
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iterations_since_restore: 556
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,556,12706.8,556000,0,0,0,394.64


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-09_18-24-07
  done: false
  episode_len_mean: 395.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1423
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6129961411158245
          entropy_coeff: 0.009999999999999998
          kl: 0.011174999274599577
          policy_loss: 0.023322202399786975
          total_loss: 0.010798444195340077
          vf_explained_var: -0.5465312600135803
          vf_loss: 0.00020796220099631075
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  iterations_since_restore: 557
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,557,12728.6,557000,0,0,0,395.08


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-09_18-24-27
  done: false
  episode_len_mean: 395.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1425
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8452752974298265
          entropy_coeff: 0.009999999999999998
          kl: 0.011893359757895737
          policy_loss: -0.048855351077185735
          total_loss: -0.06355612894727124
          vf_explained_var: -0.4300387501716614
          vf_loss: 0.0001352874536678428
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  iterations_since_restore: 558
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,558,12749.2,558000,0,0,0,395.86


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-09_18-24-50
  done: false
  episode_len_mean: 395.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1428
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5499713606304593
          entropy_coeff: 0.009999999999999998
          kl: 0.01141965283674106
          policy_loss: -0.025520221640666325
          total_loss: -0.03740296198262109
          vf_explained_var: -0.5487781167030334
          vf_loss: 0.00014433779805484746
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iterations_since_restore: 559
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,559,12772.2,559000,0,0,0,395.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-09_18-25-15
  done: false
  episode_len_mean: 395.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1431
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.686185442076789
          entropy_coeff: 0.009999999999999998
          kl: 0.009008846274287703
          policy_loss: -0.0919784025185638
          total_loss: -0.10600811392068862
          vf_explained_var: -0.7740691304206848
          vf_loss: 9.261712617040354e-05
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  iterations_since_restore: 560
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,560,12796.3,560000,0,0,0,395.2


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-09_18-25-40
  done: false
  episode_len_mean: 392.94
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1434
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.679256100124783
          entropy_coeff: 0.009999999999999998
          kl: 0.015786123141022033
          policy_loss: -0.04092238582670689
          total_loss: -0.052763895814617476
          vf_explained_var: -0.9008462429046631
          vf_loss: 0.00015060024610041484
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  iterations_since_restore: 561
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,561,12821.8,561000,0,0,0,392.94


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-09_18-26-01
  done: false
  episode_len_mean: 393.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1436
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8084465861320496
          entropy_coeff: 0.009999999999999998
          kl: 0.00847308674006043
          policy_loss: -0.061718675090620914
          total_loss: -0.07707334319129586
          vf_explained_var: -0.9474290013313293
          vf_loss: 0.0001531916790150313
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  iterations_since_restore: 562
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,562,12842.8,562000,0,0,0,393.51


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-09_18-26-23
  done: false
  episode_len_mean: 394.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1439
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8004954165882534
          entropy_coeff: 0.009999999999999998
          kl: 0.010653559806683453
          policy_loss: -0.06656418417890866
          total_loss: -0.08121585970123608
          vf_explained_var: -0.7537735104560852
          vf_loss: 0.00011360527868317958
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  iterations_since_restore: 563
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,563,12865.1,563000,0,0,0,394.1




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-09_18-27-05
  done: false
  episode_len_mean: 393.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1441
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7386241952578227
          entropy_coeff: 0.009999999999999998
          kl: 0.008724361851771611
          policy_loss: 0.06911139190196991
          total_loss: 0.05451837993330426
          vf_explained_var: -0.550580620765686
          vf_loss: 0.00014021248656111615
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_since_restore: 564
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,564,12906.6,564000,0,0,0,393.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-09_18-27-28
  done: false
  episode_len_mean: 392.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1444
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7156676358646816
          entropy_coeff: 0.009999999999999998
          kl: 0.014223739832522043
          policy_loss: -0.03173539609544807
          total_loss: -0.044376802258193494
          vf_explained_var: -0.21322983503341675
          vf_loss: 0.0001899337624005663
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_since_restore: 565
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,565,12930,565000,0,0,0,392.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-09_18-27-52
  done: false
  episode_len_mean: 390.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1447
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5913733734024895
          entropy_coeff: 0.009999999999999998
          kl: 0.008716060874255721
          policy_loss: -0.08545620731181569
          total_loss: -0.09853331231408649
          vf_explained_var: -0.4443170726299286
          vf_loss: 0.0001861335416227424
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterations_since_restore: 566
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,566,12953.5,566000,0,0,0,390.22


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-09_18-28-13
  done: false
  episode_len_mean: 390.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1449
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.848844658003913
          entropy_coeff: 0.009999999999999998
          kl: 0.008040195971698782
          policy_loss: -0.027049206073085467
          total_loss: -0.04292248828957478
          vf_explained_var: -0.9992870688438416
          vf_loss: 0.00017019850112976403
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iterations_since_restore: 567
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,567,12975,567000,0,0,0,390.98


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-09_18-28-36
  done: false
  episode_len_mean: 389.98
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1452
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5874826934602526
          entropy_coeff: 0.009999999999999998
          kl: 0.012221281551503399
          policy_loss: -0.07852260321378708
          total_loss: -0.09047836814489629
          vf_explained_var: -0.2047935128211975
          vf_loss: 0.0002026526094090918
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iterations_since_restore: 568
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,568,12997.8,568000,0,0,0,389.98


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-09_18-28-54
  done: false
  episode_len_mean: 391.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1454
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8621554030312433
          entropy_coeff: 0.009999999999999998
          kl: 0.011236296467245112
          policy_loss: -0.013164308791359266
          total_loss: -0.02824096091919475
          vf_explained_var: -0.8963847756385803
          vf_loss: 0.00012802189553945534
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iterations_since_restore: 569
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,569,13015.8,569000,0,0,0,391.13


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-09_18-29-16
  done: false
  episode_len_mean: 390.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1457
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7269093010160659
          entropy_coeff: 0.009999999999999998
          kl: 0.014606784753302944
          policy_loss: -0.0670205140279399
          total_loss: -0.07957795908053716
          vf_explained_var: -0.722243070602417
          vf_loss: 0.0002698270722046598
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations_since_restore: 570
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,570,13037.8,570000,0,0,0,390.86


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-09_18-29-42
  done: false
  episode_len_mean: 387.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1460
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4937816076808506
          entropy_coeff: 0.009999999999999998
          kl: 0.010233572831678946
          policy_loss: 0.031246241099304625
          total_loss: 0.019579549299346077
          vf_explained_var: 0.674088716506958
          vf_loss: 0.00015916781679455502
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  iterations_since_restore: 571
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,571,13063.7,571000,0,0,0,387.45


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-09_18-30-05
  done: false
  episode_len_mean: 387.52
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1462
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5906337844000922
          entropy_coeff: 0.009999999999999998
          kl: 0.009421177330227218
          policy_loss: -0.07665900509390566
          total_loss: -0.08952858256590035
          vf_explained_var: -0.5993196964263916
          vf_loss: 0.00017184508954718087
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  iterations_since_restore: 572
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,572,13086.4,572000,0,0,0,387.52


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-09_18-30-23
  done: false
  episode_len_mean: 389.01
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1465
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4794954233699376
          entropy_coeff: 0.009999999999999998
          kl: 0.007318381626914297
          policy_loss: -0.014705407867829005
          total_loss: -0.02705611561735471
          vf_explained_var: -0.9658825993537903
          vf_loss: 0.00021878087750843003
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
  iterations_since_restore: 573
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,573,13105,573000,0,0,0,389.01


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-09_18-30-43
  done: false
  episode_len_mean: 390.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1467
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.882386130756802
          entropy_coeff: 0.009999999999999998
          kl: 0.011380221269737194
          policy_loss: -0.008161441733439764
          total_loss: -0.023318451560205882
          vf_explained_var: -0.9448954463005066
          vf_loss: 0.00020620676110007075
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  iterations_since_restore: 574
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,574,13124.5,574000,0,0,0,390.24


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-09_18-31-02
  done: false
  episode_len_mean: 391.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1469
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7808996107843187
          entropy_coeff: 0.009999999999999998
          kl: 0.011522898790848985
          policy_loss: -0.04176984777053197
          total_loss: -0.05587275500098864
          vf_explained_var: 0.46324074268341064
          vf_loss: 0.00020205409681592655
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  iterations_since_restore: 575
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,575,13143.4,575000,0,0,0,391.55




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-09_18-31-40
  done: false
  episode_len_mean: 391.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1471
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8095108879937065
          entropy_coeff: 0.009999999999999998
          kl: 0.009161609906822892
          policy_loss: -0.002741288745568858
          total_loss: -0.01792154349386692
          vf_explained_var: -0.9078553915023804
          vf_loss: 0.00012887279606527754
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  iterations_since_restore: 576
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,576,13181.5,576000,0,0,0,391.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-09_18-31-58
  done: false
  episode_len_mean: 393.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1473
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8931437810262044
          entropy_coeff: 0.009999999999999998
          kl: 0.011994279194253683
          policy_loss: -0.07375799732075797
          total_loss: -0.08894659669862853
          vf_explained_var: -0.6443395614624023
          vf_loss: 9.545995477916828e-05
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  iterations_since_restore: 577
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,577,13199.1,577000,0,0,0,393.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-09_18-32-16
  done: false
  episode_len_mean: 395.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1475
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9163869818051655
          entropy_coeff: 0.009999999999999998
          kl: 0.013451127887007806
          policy_loss: -0.035058987223439746
          total_loss: -0.05005277039276229
          vf_explained_var: -0.4341140687465668
          vf_loss: 7.969285903123415e-05
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  iterations_since_restore: 578
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,578,13217.8,578000,0,0,0,395.43


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-09_18-32-37
  done: false
  episode_len_mean: 396.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1478
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7241643799675836
          entropy_coeff: 0.009999999999999998
          kl: 0.00879371691473963
          policy_loss: -0.03905981143729554
          total_loss: -0.0535268105359541
          vf_explained_var: 0.07260831445455551
          vf_loss: 0.00010053430321729846
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  iterations_since_restore: 579
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,579,13238.9,579000,0,0,0,396.82


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-09_18-33-01
  done: false
  episode_len_mean: 397.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1481
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.587540324529012
          entropy_coeff: 0.009999999999999998
          kl: 0.005460133487273626
          policy_loss: 0.0063602199157079065
          total_loss: -0.007702702532211939
          vf_explained_var: -0.7639926671981812
          vf_loss: 0.0001520896337751765
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  iterations_since_restore: 580
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,580,13262.1,580000,0,0,0,397.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-09_18-33-24
  done: false
  episode_len_mean: 396.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1484
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6936156272888183
          entropy_coeff: 0.009999999999999998
          kl: 0.00882422674594196
          policy_loss: 0.04137305716673533
          total_loss: 0.027317110531859926
          vf_explained_var: -1.0
          vf_loss: 0.00019682206022682497
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  iterations_since_restore: 581
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,581,13284.9,581000,0,0,0,396.73


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-09_18-33-42
  done: false
  episode_len_mean: 398.69
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1486
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9100376036432054
          entropy_coeff: 0.009999999999999998
          kl: 0.013064204883467115
          policy_loss: -0.08973910106966893
          total_loss: -0.10478516103078922
          vf_explained_var: -0.796758770942688
          vf_loss: 8.158384682448943e-05
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  iterations_since_restore: 582
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,582,13303.4,582000,0,0,0,398.69


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-09_18-34-04
  done: false
  episode_len_mean: 398.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1488
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7140396568510268
          entropy_coeff: 0.009999999999999998
          kl: 0.008514477541386532
          policy_loss: -0.0016908417559332318
          total_loss: -0.016123394171396892
          vf_explained_var: -1.0
          vf_loss: 0.00011865057919446069
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  iterations_since_restore: 583
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,583,13324.9,583000,0,0,0,398.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-09_18-34-24
  done: false
  episode_len_mean: 399.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1491
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6948585947354635
          entropy_coeff: 0.009999999999999998
          kl: 0.008806030437596648
          policy_loss: -0.023002307779259153
          total_loss: -0.03712347617579831
          vf_explained_var: -1.0
          vf_loss: 0.0001495640086229994
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  iterations_since_restore: 584
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,584,13345.6,584000,0,0,0,399.81


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-09_18-34-44
  done: false
  episode_len_mean: 399.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1493
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8628423637813991
          entropy_coeff: 0.009999999999999998
          kl: 0.00938976438768738
          policy_loss: 0.004587456811633375
          total_loss: -0.011116907186806201
          vf_explained_var: -1.0
          vf_loss: 6.869842084900786e-05
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  iterations_since_restore: 585
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,585,13365.4,585000,0,0,0,399.99


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-09_18-35-05
  done: false
  episode_len_mean: 399.95
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1495
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7641771541701423
          entropy_coeff: 0.009999999999999998
          kl: 0.007937680605634215
          policy_loss: -0.03853387838850419
          total_loss: -0.053674615919589996
          vf_explained_var: -0.789711058139801
          vf_loss: 8.724096850427385e-05
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  iterations_since_restore: 586
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,586,13386.6,586000,0,0,0,399.95


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-09_18-35-25
  done: false
  episode_len_mean: 399.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1498
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8878992451561822
          entropy_coeff: 0.009999999999999998
          kl: 0.01340493899345664
          policy_loss: -0.03745813605686029
          total_loss: -0.05215510183738337
          vf_explained_var: -1.0
          vf_loss: 0.00010567571759262743
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterations_since_restore: 587
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,587,13405.8,587000,0,0,0,399.42




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-09_18-36-05
  done: false
  episode_len_mean: 399.59
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1500
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.842109861638811
          entropy_coeff: 0.009999999999999998
          kl: 0.011307444479254929
          policy_loss: -0.12935532095531624
          total_loss: -0.1441984944873386
          vf_explained_var: -0.7248565554618835
          vf_loss: 0.00013941020693487694
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  iterations_since_restore: 588
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,588,13445.9,588000,0,0,0,399.59


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-09_18-36-27
  done: false
  episode_len_mean: 396.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1503
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8036289241578844
          entropy_coeff: 0.009999999999999998
          kl: 0.017613859268287454
          policy_loss: -0.03025223505165842
          total_loss: -0.04273254540231493
          vf_explained_var: -0.8394441604614258
          vf_loss: 0.00019972586067322278
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  iterations_since_restore: 589
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,589,13468,589000,0,0,0,396.75


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-09_18-36-48
  done: false
  episode_len_mean: 395.97
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1506
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.761054684056176
          entropy_coeff: 0.009999999999999998
          kl: 0.011013433034638274
          policy_loss: -0.017288407766156727
          total_loss: -0.03139306786987517
          vf_explained_var: -0.9938374161720276
          vf_loss: 0.00015678011857542313
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  iterations_since_restore: 590
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,590,13489.6,590000,0,0,0,395.97


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-09_18-37-10
  done: false
  episode_len_mean: 395.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1508
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.711025471157498
          entropy_coeff: 0.009999999999999998
          kl: 0.01038065333397778
          policy_loss: -0.0345836915800141
          total_loss: -0.048361710728042656
          vf_explained_var: -0.8237192630767822
          vf_loss: 0.00017555064397230228
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  iterations_since_restore: 591
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,591,13511.2,591000,0,0,0,395.36


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-09_18-37-30
  done: false
  episode_len_mean: 396.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1511
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6357042498058743
          entropy_coeff: 0.009999999999999998
          kl: 0.010202860087797996
          policy_loss: -0.06599693968892098
          total_loss: -0.07879479920698537
          vf_explained_var: -0.8573732376098633
          vf_loss: 0.00045656782232173204
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  iterations_since_restore: 592
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,592,13531.6,592000,0,0,0,396.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-09_18-37-50
  done: false
  episode_len_mean: 398.26
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1513
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.728585085603926
          entropy_coeff: 0.009999999999999998
          kl: 0.009183723281710382
          policy_loss: -0.016740760227872267
          total_loss: -0.0310285196122196
          vf_explained_var: -0.8382988572120667
          vf_loss: 0.00020538649340677591
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  iterations_since_restore: 593
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,593,13550.9,593000,0,0,0,398.26


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-09_18-38-08
  done: false
  episode_len_mean: 400.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1515
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.826596846845415
          entropy_coeff: 0.009999999999999998
          kl: 0.013072910888684557
          policy_loss: -0.08422180103758971
          total_loss: -0.09839228234357304
          vf_explained_var: -0.8783835768699646
          vf_loss: 0.00012010638408052424
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  iterations_since_restore: 594
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,594,13569.4,594000,0,0,0,400.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-09_18-38-29
  done: false
  episode_len_mean: 401.42
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1517
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.808407065603468
          entropy_coeff: 0.009999999999999998
          kl: 0.008757031519306
          policy_loss: -0.019793038215074273
          total_loss: -0.03508594857735766
          vf_explained_var: -0.803881824016571
          vf_loss: 0.00012820746002641196
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  iterations_since_restore: 595
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,595,13589.9,595000,0,0,0,401.42


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-09_18-38-52
  done: false
  episode_len_mean: 400.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1520
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7755463931295608
          entropy_coeff: 0.009999999999999998
          kl: 0.009235729171259206
          policy_loss: -0.04433804266154766
          total_loss: -0.059149386112888655
          vf_explained_var: -0.6808264255523682
          vf_loss: 0.00013559875418044006
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  iterations_since_restore: 596
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,596,13613.5,596000,0,0,0,400.09


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-09_18-39-14
  done: false
  episode_len_mean: 400.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1523
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8364399909973144
          entropy_coeff: 0.009999999999999998
          kl: 0.010336719750484096
          policy_loss: -0.0538011423829529
          total_loss: -0.06891399253573682
          vf_explained_var: -0.9412980675697327
          vf_loss: 0.00010822490896518704
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  iterations_since_restore: 597
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,597,13635.3,597000,0,0,0,400.04


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-09_18-39-39
  done: false
  episode_len_mean: 398.31
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1525
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7780751321050856
          entropy_coeff: 0.009999999999999998
          kl: 0.007172698604782523
          policy_loss: -0.02187675181776285
          total_loss: -0.037373629579734474
          vf_explained_var: -0.9926095604896545
          vf_loss: 0.00010270385444325964
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_since_restore: 598
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,598,13660.3,598000,0,0,0,398.31


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-09_18-40-03
  done: false
  episode_len_mean: 398.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1528
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.869541969564226
          entropy_coeff: 0.009999999999999998
          kl: 0.008675191374117652
          policy_loss: -0.10100384213858181
          total_loss: -0.11697159277068245
          vf_explained_var: -1.0
          vf_loss: 8.960365602332685e-05
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iterations_since_restore: 599
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,599,13683.9,599000,0,0,0,398.45




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-09_18-40-44
  done: false
  episode_len_mean: 397.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1531
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7371543208758036
          entropy_coeff: 0.009999999999999998
          kl: 0.009501999325453427
          policy_loss: -0.01775085073378351
          total_loss: -0.032050532102584836
          vf_explained_var: -0.8425804972648621
          vf_loss: 0.0001823706018816059
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_since_restore: 600
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,600,13724.6,600000,0,0,0,397.81


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-09_18-41-03
  done: false
  episode_len_mean: 399.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1533
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9105999681684707
          entropy_coeff: 0.009999999999999998
          kl: 0.011154511323521948
          policy_loss: -0.07610616638428636
          total_loss: -0.09171840581629011
          vf_explained_var: -1.0
          vf_loss: 0.00010175252613812013
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  iterations_since_restore: 601
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,601,13744,601000,0,0,0,399.5


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-09_18-41-26
  done: false
  episode_len_mean: 400.19
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1536
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6746617939737107
          entropy_coeff: 0.009999999999999998
          kl: 0.01168503504606592
          policy_loss: 0.03748427174157566
          total_loss: 0.024377393474181493
          vf_explained_var: -1.0
          vf_loss: 8.640496319761345e-05
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  iterations_since_restore: 602
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,602,13766.8,602000,0,0,0,400.19


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-09_18-41-49
  done: false
  episode_len_mean: 399.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1539
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.711650452348921
          entropy_coeff: 0.009999999999999998
          kl: 0.010775633811311122
          policy_loss: -0.044062719059487186
          total_loss: -0.057791418727073406
          vf_explained_var: -1.0
          vf_loss: 0.00011101317423809734
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000
  iterations_since_restore: 603
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,603,13790.3,603000,0,0,0,399.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-09_18-42-13
  done: false
  episode_len_mean: 399.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1542
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7949948032697043
          entropy_coeff: 0.009999999999999998
          kl: 0.009235830788895328
          policy_loss: -0.04817543021506733
          total_loss: -0.0632435590442684
          vf_explained_var: -1.0
          vf_loss: 7.32689197320724e-05
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
  iterations_since_restore: 604
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,604,13814.2,604000,0,0,0,399.68


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-09_18-42-37
  done: false
  episode_len_mean: 399.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1544
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7652578698264227
          entropy_coeff: 0.009999999999999998
          kl: 0.010568890202098145
          policy_loss: -0.012426255705455939
          total_loss: -0.02675252455390162
          vf_explained_var: -1.0
          vf_loss: 0.00011238600822657139
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  iterations_since_restore: 605
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,605,13838,605000,0,0,0,399.66


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-09_18-43-04
  done: false
  episode_len_mean: 397.92
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1548
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7298839304182265
          entropy_coeff: 0.009999999999999998
          kl: 0.009972340913947894
          policy_loss: -0.0016164982277486058
          total_loss: -0.01582144753386577
          vf_explained_var: -1.0
          vf_loss: 6.136892641532339e-05
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
  iterations_since_restore: 606
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,606,13864.7,606000,0,0,0,397.92


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-09_18-43-29
  done: false
  episode_len_mean: 396.55
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1550
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8803984562555949
          entropy_coeff: 0.009999999999999998
          kl: 0.011395442673741327
          policy_loss: -0.028911850684218938
          total_loss: -0.04415278145008617
          vf_explained_var: -0.34938645362854004
          vf_loss: 9.777917879950514e-05
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
  iterations_since_restore: 607
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,607,13889.7,607000,0,0,0,396.55


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-09_18-43-53
  done: false
  episode_len_mean: 395.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1553
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7785817212528652
          entropy_coeff: 0.009999999999999998
          kl: 0.006491973155749382
          policy_loss: 0.018034598603844643
          total_loss: 0.0023108429378933377
          vf_explained_var: -0.40164974331855774
          vf_loss: 8.789719051694394e-05
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000
  iterations_since_restore: 608
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,608,13913.9,608000,0,0,0,395.1


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-09_18-44-18
  done: false
  episode_len_mean: 392.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1556
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5963063756624858
          entropy_coeff: 0.009999999999999998
          kl: 0.006955938450779245
          policy_loss: -0.08295053260193931
          total_loss: -0.09659700372980701
          vf_explained_var: -0.7278886437416077
          vf_loss: 0.00020134104060060863
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000
  iterations_since_restore: 609
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,609,13938.7,609000,0,0,0,392.34


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-09_18-44-40
  done: false
  episode_len_mean: 393.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1559
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8403609633445739
          entropy_coeff: 0.009999999999999998
          kl: 0.010674837577546784
          policy_loss: -0.05888792640633053
          total_loss: -0.07398102593918642
          vf_explained_var: -0.9956801533699036
          vf_loss: 6.43656656595542e-05
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  iterations_since_restore: 610
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,610,13961.3,610000,0,0,0,393.1




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-09_18-45-25
  done: false
  episode_len_mean: 391.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1562
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6417025089263917
          entropy_coeff: 0.009999999999999998
          kl: 0.01077348714556826
          policy_loss: -0.01013812203374174
          total_loss: -0.0232033914162053
          vf_explained_var: -0.936432421207428
          vf_loss: 7.561314131534244e-05
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000
  iterations_since_restore: 611
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,611,14005.6,611000,0,0,0,391.76


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-09_18-45-50
  done: false
  episode_len_mean: 386.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1565
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.608531822098626
          entropy_coeff: 0.009999999999999998
          kl: 0.009689327958421146
          policy_loss: -0.018980151828792362
          total_loss: -0.031958529187573324
          vf_explained_var: -0.6594614386558533
          vf_loss: 0.00016048125366473364
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000
  iterations_since_restore: 612
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,612,14030.6,612000,0,0,0,386.47


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-09_18-46-18
  done: false
  episode_len_mean: 381.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1569
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4891714003351
          entropy_coeff: 0.009999999999999998
          kl: 0.010556822781488269
          policy_loss: -0.017310114546368518
          total_loss: -0.028887184150516988
          vf_explained_var: -0.44563013315200806
          vf_loss: 0.00010438694456145943
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000
  iterations_since_restore: 613
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,613,14058.3,613000,0,0,0,381.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-09_18-46-45
  done: false
  episode_len_mean: 378.27
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1572
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.5574216193623014
          entropy_coeff: 0.009999999999999998
          kl: 0.010746995360870986
          policy_loss: -0.0874287953807248
          total_loss: -0.09955073408782482
          vf_explained_var: -0.4864913821220398
          vf_loss: 0.00018419071323781585
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
  iterations_since_restore: 614
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,614,14086,614000,0,0,0,378.27


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-09_18-47-11
  done: false
  episode_len_mean: 372.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1575
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4883401248190138
          entropy_coeff: 0.009999999999999998
          kl: 0.008294760147039821
          policy_loss: -0.06059948081771533
          total_loss: -0.07276062741875648
          vf_explained_var: -0.9773719906806946
          vf_loss: 0.00019987703740803733
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  iterations_since_restore: 615
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,615,14112.2,615000,0,0,0,372.65


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-09_18-47-37
  done: false
  episode_len_mean: 369.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1578
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4305045154359606
          entropy_coeff: 0.009999999999999998
          kl: 0.00973863838795322
          policy_loss: -0.08652499674095047
          total_loss: -0.09768950169285139
          vf_explained_var: -0.849452018737793
          vf_loss: 0.00017908700909982953
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iterations_since_restore: 616
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,616,14138.1,616000,0,0,0,369.88


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-09_18-48-05
  done: false
  episode_len_mean: 367.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1582
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4538627558284336
          entropy_coeff: 0.009999999999999998
          kl: 0.008188454632612925
          policy_loss: -0.030296713031000563
          total_loss: -0.04223529133531782
          vf_explained_var: -0.49836036562919617
          vf_loss: 0.00010999535726215173
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  iterations_since_restore: 617
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,617,14165.8,617000,0,0,0,367.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-09_18-48-34
  done: false
  episode_len_mean: 362.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1585
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.3432129184405008
          entropy_coeff: 0.009999999999999998
          kl: 0.0043256703855828645
          policy_loss: -0.005320023828082615
          total_loss: -0.017309594402710595
          vf_explained_var: -0.2544165551662445
          vf_loss: 0.00012715234075181392
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  iterations_since_restore: 618
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,618,14194.2,618000,0,0,0,362.82


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-09_18-49-05
  done: false
  episode_len_mean: 357.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1589
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 1.3012219468752544
          entropy_coeff: 0.009999999999999998
          kl: 0.004281887538137299
          policy_loss: -0.022388462639517253
          total_loss: -0.034631645493209365
          vf_explained_var: -0.7444595098495483
          vf_loss: 0.00011799041013647285
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  iterations_since_restore: 619
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,619,14225.3,619000,0,0,0,357.88




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-09_18-49-51
  done: false
  episode_len_mean: 352.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1592
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.4891355011198255
          entropy_coeff: 0.009999999999999998
          kl: 0.0163832868957141
          policy_loss: 0.035379119714101154
          total_loss: 0.02188757848408487
          vf_explained_var: -0.00011508663737913594
          vf_loss: 0.0001543030831170553
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  iterations_since_restore: 620
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,620,14271.5,620000,0,0,0,352.37


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-09_18-50-20
  done: false
  episode_len_mean: 347.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1596
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.4069956554306877
          entropy_coeff: 0.009999999999999998
          kl: 0.012940669078007025
          policy_loss: -0.07571395403809017
          total_loss: -0.08867906199561225
          vf_explained_var: -0.6677120923995972
          vf_loss: 0.00012105500540605539
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iterations_since_restore: 621
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,621,14300.4,621000,0,0,0,347.9


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-09_18-50-45
  done: false
  episode_len_mean: 344.25
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1599
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.3815520193841722
          entropy_coeff: 0.009999999999999998
          kl: 0.01187512803822997
          policy_loss: 0.01071446215113004
          total_loss: -0.0020445007416937085
          vf_explained_var: -0.9862143397331238
          vf_loss: 0.0001537711620686524
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  iterations_since_restore: 622
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,622,14326.1,622000,0,0,0,344.25


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-09_18-51-11
  done: false
  episode_len_mean: 343.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1602
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.4737687971856859
          entropy_coeff: 0.009999999999999998
          kl: 0.011102270417989443
          policy_loss: -0.07297731182641454
          total_loss: -0.08679705758889517
          vf_explained_var: -0.9193966388702393
          vf_loss: 7.391049245294804e-05
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iterations_since_restore: 623
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,623,14351.5,623000,0,0,0,343.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-09_18-51-36
  done: false
  episode_len_mean: 341.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1605
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.429443249437544
          entropy_coeff: 0.009999999999999998
          kl: 0.01229740220481664
          policy_loss: -0.07009209659364489
          total_loss: -0.08328825686540868
          vf_explained_var: -1.0
          vf_loss: 0.00016338448416111835
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  iterations_since_restore: 624
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,624,14377,624000,0,0,0,341.77


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-09_18-52-06
  done: false
  episode_len_mean: 337.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1609
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.4738904648356967
          entropy_coeff: 0.009999999999999998
          kl: 0.013858202333307092
          policy_loss: 0.07096607403622733
          total_loss: 0.05797781352367666
          vf_explained_var: 0.1282629668712616
          vf_loss: 0.0006970988538847046
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  iterations_since_restore: 625
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,625,14406.9,625000,0,0,0,337.87


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-09_18-52-34
  done: false
  episode_len_mean: 333.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1612
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.5033999231126574
          entropy_coeff: 0.009999999999999998
          kl: 0.011837618920158378
          policy_loss: -0.05333304697026809
          total_loss: -0.06738757830527094
          vf_explained_var: -0.9244667291641235
          vf_loss: 7.953371653760163e-05
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterations_since_restore: 626
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,626,14434.4,626000,0,0,0,333.29


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-09_18-53-02
  done: false
  episode_len_mean: 326.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1616
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.3843620194329156
          entropy_coeff: 0.009999999999999998
          kl: 0.016090137408347104
          policy_loss: -0.06535233217808935
          total_loss: -0.07786351963877677
          vf_explained_var: -0.40746238827705383
          vf_loss: 0.00010920604238183135
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations_since_restore: 627
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,627,14462.9,627000,0,0,0,326.28


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-09_18-53-30
  done: false
  episode_len_mean: 323.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1619
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.4325194054179722
          entropy_coeff: 0.009999999999999998
          kl: 0.013108925730382422
          policy_loss: -0.09160297840005821
          total_loss: -0.10482891307522853
          vf_explained_var: -0.2799719572067261
          vf_loss: 0.0001026753081128441
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iterations_since_restore: 628
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,628,14490.4,628000,0,0,0,323.54




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-09_18-54-15
  done: false
  episode_len_mean: 318.93
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1623
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 1.2157660762468974
          entropy_coeff: 0.009999999999999998
          kl: 0.020417550916144556
          policy_loss: -0.07469871408409542
          total_loss: -0.08512012557023102
          vf_explained_var: 0.4207146167755127
          vf_loss: 0.00018403816094279238
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  iterations_since_restore: 629
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,629,14535.9,629000,0,0,0,318.93


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-09_18-54-42
  done: false
  episode_len_mean: 317.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1626
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.479263218243917
          entropy_coeff: 0.009999999999999998
          kl: 0.01623654363409036
          policy_loss: -0.019783060273362532
          total_loss: -0.03259086461944712
          vf_explained_var: -0.6050949692726135
          vf_loss: 0.00013329602499854648
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iterations_since_restore: 630
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,630,14562,630000,0,0,0,317.37


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-09_18-55-09
  done: false
  episode_len_mean: 314.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1630
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.3565082046720716
          entropy_coeff: 0.009999999999999998
          kl: 0.012424923652271817
          policy_loss: -0.02145263109770086
          total_loss: -0.03349621084829171
          vf_explained_var: -0.582496166229248
          vf_loss: 0.00010462551233811408
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations_since_restore: 631
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,631,14589.2,631000,0,0,0,314.75


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-09_18-55-34
  done: false
  episode_len_mean: 312.58
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1633
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.7370899187193976
          entropy_coeff: 0.009999999999999998
          kl: 0.0123476072461459
          policy_loss: 0.0014956391313009792
          total_loss: -0.014309815855489837
          vf_explained_var: -0.362280011177063
          vf_loss: 0.0001573865205702734
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_since_restore: 632
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,632,14614.6,632000,0,0,0,312.58


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-09_18-56-02
  done: false
  episode_len_mean: 309.38
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1636
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.6154872006840175
          entropy_coeff: 0.009999999999999998
          kl: 0.01783916491614546
          policy_loss: 0.01900504893726773
          total_loss: 0.005012137360042996
          vf_explained_var: 0.006365272682160139
          vf_loss: 0.00012767103398800828
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  iterations_since_restore: 633
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,633,14642.1,633000,0,0,0,309.38


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-09_18-56-26
  done: false
  episode_len_mean: 308.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1639
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.787611711025238
          entropy_coeff: 0.009999999999999998
          kl: 0.01782135921923352
          policy_loss: -0.04935743328597811
          total_loss: -0.06507380329486397
          vf_explained_var: -0.4998512268066406
          vf_loss: 0.00012749046759886874
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_since_restore: 634
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,634,14666.5,634000,0,0,0,308.45


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-09_18-56-52
  done: false
  episode_len_mean: 307.63
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1642
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.6243395646413168
          entropy_coeff: 0.009999999999999998
          kl: 0.010425181277833871
          policy_loss: -0.03018711705485152
          total_loss: -0.045114668148259325
          vf_explained_var: -1.0
          vf_loss: 0.00012701114303328924
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iterations_since_restore: 635
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,635,14692,635000,0,0,0,307.63


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-09_18-57-18
  done: false
  episode_len_mean: 306.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 1646
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.4813420295715332
          entropy_coeff: 0.009999999999999998
          kl: 0.01386745455960489
          policy_loss: -0.020373688058720696
          total_loss: -0.033485279252959625
          vf_explained_var: -0.5931133031845093
          vf_loss: 0.00012045595544299836
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterations_since_restore: 636
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,636,14718.1,636000,0,0,0,306.11


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-09_18-57-44
  done: false
  episode_len_mean: 306.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1649
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.449593683083852
          entropy_coeff: 0.009999999999999998
          kl: 0.013605608342966806
          policy_loss: 0.0355719936804639
          total_loss: 0.022714842648969755
          vf_explained_var: 0.17978373169898987
          vf_loss: 8.727124877623282e-05
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_since_restore: 637
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,637,14744.3,637000,0,0,0,306




Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-09_18-58-25
  done: false
  episode_len_mean: 306.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1652
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 1.7466045008765327
          entropy_coeff: 0.009999999999999998
          kl: 0.02128897396047471
          policy_loss: 0.020504464436736373
          total_loss: 0.0057088771628008945
          vf_explained_var: -0.5471435785293579
          vf_loss: 0.00024277255175143687
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_since_restore: 638
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,638,14785.3,638000,0,0,0,306.03


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-09_18-58-46
  done: false
  episode_len_mean: 305.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1654
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.7985235585106745
          entropy_coeff: 0.009999999999999998
          kl: 0.013978570738141164
          policy_loss: -0.06066268293393983
          total_loss: -0.07611013129353524
          vf_explained_var: 0.3897990882396698
          vf_loss: 0.00014671902778597238
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_since_restore: 639
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,639,14806.3,639000,0,0,0,305.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-09_18-59-05
  done: false
  episode_len_mean: 308.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1657
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.856855775250329
          entropy_coeff: 0.009999999999999998
          kl: 0.01604116674915282
          policy_loss: 0.0016537024743027158
          total_loss: -0.014056532747215696
          vf_explained_var: 0.06404173374176025
          vf_loss: 0.00011444391525906717
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_since_restore: 640
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,640,14825.6,640000,0,0,0,308


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-09_18-59-28
  done: false
  episode_len_mean: 308.32
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1660
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.9263122571839226
          entropy_coeff: 0.009999999999999998
          kl: 0.014330314593537471
          policy_loss: -0.011664589618643124
          total_loss: -0.0284130042211877
          vf_explained_var: -0.016929317265748978
          vf_loss: 6.347225369406968e-05
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_since_restore: 641
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,641,14847.9,641000,0,0,0,308.32


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-09_18-59-49
  done: false
  episode_len_mean: 308.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1662
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.3463653610812294
          entropy_coeff: 0.009999999999999998
          kl: 0.010397371679199354
          policy_loss: -0.0475613996386528
          total_loss: -0.05911751091480255
          vf_explained_var: -0.2251402735710144
          vf_loss: 0.0001290480681038591
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations_since_restore: 642
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,642,14869.2,642000,0,0,0,308.86


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-09_19-00-10
  done: false
  episode_len_mean: 311.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1665
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.7064593765470717
          entropy_coeff: 0.009999999999999998
          kl: 0.018536453255483663
          policy_loss: -0.07502516441875035
          total_loss: -0.08880617469549179
          vf_explained_var: -0.5425310134887695
          vf_loss: 0.00011287722166647048
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_since_restore: 643
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,643,14890.2,643000,0,0,0,311.74


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-09_19-00-29
  done: false
  episode_len_mean: 312.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1667
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.7699115263091194
          entropy_coeff: 0.009999999999999998
          kl: 0.01181873446512455
          policy_loss: -0.01294419773750835
          total_loss: -0.028494661011629636
          vf_explained_var: -0.146320179104805
          vf_loss: 0.0001270280105826613
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
  iterations_since_restore: 644
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,644,14909.2,644000,0,0,0,312.61


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-09_19-00-50
  done: false
  episode_len_mean: 315.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1670
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.6333085417747497
          entropy_coeff: 0.009999999999999998
          kl: 0.01436233003035849
          policy_loss: 0.010213574384235674
          total_loss: -0.0033717055287626053
          vf_explained_var: -0.4183010458946228
          vf_loss: 0.00029109699810053117
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  iterations_since_restore: 645
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,645,14930.1,645000,0,0,0,315.54


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-09_19-01-09
  done: false
  episode_len_mean: 317.49
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1672
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.7837755587365893
          entropy_coeff: 0.009999999999999998
          kl: 0.014142176796808608
          policy_loss: -0.04324775189161301
          total_loss: -0.05852588957382573
          vf_explained_var: -0.12044397741556168
          vf_loss: 0.0001405654453265015
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  iterations_since_restore: 646
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,646,14949.1,646000,0,0,0,317.49


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-09_19-01-28
  done: false
  episode_len_mean: 321.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 1675
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.6703581796752083
          entropy_coeff: 0.009999999999999998
          kl: 0.012038912760173975
          policy_loss: -0.06935657180017896
          total_loss: -0.08384120729234483
          vf_explained_var: -0.18715892732143402
          vf_loss: 0.0001596636047502721
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iterations_since_restore: 647
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,647,14968.4,647000,0,0,0,321.84


Result for PPO_my_env_67f57_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-09_19-01-47
  done: false
  episode_len_mean: 324.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 1677
  experiment_id: 9188d825d2554895b0ca0fd85ffa2a7a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1710523009300232
          cur_lr: 5.000000000000001e-05
          entropy: 1.7168709794680277
          entropy_coeff: 0.009999999999999998
          kl: 0.009395367397608743
          policy_loss: -0.06250214498076175
          total_loss: -0.07788888747907347
          vf_explained_var: -0.48133841156959534
          vf_loss: 0.00017487067404241922
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations_since_restore: 648
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_67f57_00000,RUNNING,192.168.3.5:202262,648,14987.5,648000,0,0,0,324
