In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

from models import VisualEncoder
from train import *



In [2]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder(features_dim)
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AtariCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [3]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [5]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_82e40_00000,PENDING,


2021-09-18 10:02:20,060	INFO wandb.py:170 -- Already logged into W&B.
2021-09-18 10:02:20,070	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=35494)[0m 2021-09-18 10:02:23,575	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=35494)[0m 2021-09-18 10:02:23,575	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-18_10-03-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -2.0
  episode_reward_mean: -2.0
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3492794672648112
          entropy_coeff: 0.009999999999999998
          kl: 0.012402718118078582
          policy_loss: -0.025099800527095796
          total_loss: 0.01665419919623269
          vf_explained_var: 0.426866352558136
          vf_loss: 0.05276625651038355
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,1,60.3318,1000,-2,-2,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-18_10-03-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5049749784999424
          entropy_coeff: 0.009999999999999998
          kl: 0.009203416569786791
          policy_loss: -0.04313008727298843
          total_loss: -0.04535575947827763
          vf_explained_var: 0.14363862574100494
          vf_loss: 0.010983393232648572
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,2,70.4457,2000,-1,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-18_10-03-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6666666666666666
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.11498308579127
          entropy_coeff: 0.009999999999999998
          kl: 0.0170997831035636
          policy_loss: -0.01669652871787548
          total_loss: -0.020949005087216695
          vf_explained_var: -0.019618937745690346
          vf_loss: 0.013477399971129166
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,3,79.9512,3000,-0.666667,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-18_10-04-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9144049260351392
          entropy_coeff: 0.009999999999999998
          kl: 0.010913096190881235
          policy_loss: 0.09790919232699606
          total_loss: 0.08772630592187246
          vf_explained_var: 0.19994424283504486
          vf_loss: 0.006778542905683733
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,4,93.026,4000,-0.5,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-18_10-04-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1709410429000853
          entropy_coeff: 0.009999999999999998
          kl: 0.009136174667206376
          policy_loss: 0.18994184368186526
          total_loss: 0.17185809537768365
          vf_explained_var: -0.13642852008342743
          vf_loss: 0.0017984241939201537
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,5,101.153,5000,-0.4,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-18_10-04-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3826948483784993
          entropy_coeff: 0.009999999999999998
          kl: 0.010733825029628758
          policy_loss: -0.04424335988652375
          total_loss: -0.05943734370585945
          vf_explained_var: -0.3178164064884186
          vf_loss: 0.006486203524077104
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,6,110.002,6000,-0.333333,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-18_10-04-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2857142857142857
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3844860752423602
          entropy_coeff: 0.009999999999999998
          kl: 0.013298341637684816
          policy_loss: 0.021755423479610018
          total_loss: 0.0026361430684725446
          vf_explained_var: -0.23994502425193787
          vf_loss: 0.0020659138716938386
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,7,118.725,7000,-0.285714,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-18_10-04-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.284607442220052
          entropy_coeff: 0.009999999999999998
          kl: 0.01539579000363403
          policy_loss: -0.019971351077159246
          total_loss: -0.03671116564008925
          vf_explained_var: 0.036355022341012955
          vf_loss: 0.0030271016953823467
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,8,128.639,8000,-0.25,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-18_10-04-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2222222222222222
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0337217887242636
          entropy_coeff: 0.009999999999999998
          kl: 0.010666946317417105
          policy_loss: 0.021765688558419545
          total_loss: 0.006169976045687993
          vf_explained_var: -0.07091762125492096
          vf_loss: 0.0026081173059840997
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,9,138.546,9000,-0.222222,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-18_10-04-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.500775835249159
          entropy_coeff: 0.009999999999999998
          kl: 0.017261451856591916
          policy_loss: -0.012421180307865144
          total_loss: -0.031928955101304585
          vf_explained_var: -0.9992537498474121
          vf_loss: 0.002047693588408745
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,10,148.479,10000,-0.2,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-18_10-05-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18181818181818182
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.28419560458925
          entropy_coeff: 0.009999999999999998
          kl: 0.009038869010951661
          policy_loss: -0.11170045302973854
          total_loss: -0.1308594412687752
          vf_explained_var: -0.3595516085624695
          vf_loss: 0.0018751916039036586
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,11,158.143,11000,-0.181818,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-18_10-05-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16666666666666666
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.078777007261912
          entropy_coeff: 0.009999999999999998
          kl: 0.008066301129373739
          policy_loss: -0.03558609129654037
          total_loss: -0.052876170145140754
          vf_explained_var: -1.0
          vf_loss: 0.0018844302801880985
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,12,168.367,12000,-0.166667,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-18_10-05-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15384615384615385
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1653157856729295
          entropy_coeff: 0.009999999999999998
          kl: 0.014326294104446507
          policy_loss: -0.06463646673493915
          total_loss: -0.07067967785729302
          vf_explained_var: 0.046967729926109314
          vf_loss: 0.012744685273436416
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,13,177.426,13000,-0.153846,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-18_10-05-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14285714285714285
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0490508476893106
          entropy_coeff: 0.009999999999999998
          kl: 0.009795819833096549
          policy_loss: 0.013467979513936572
          total_loss: -0.0033467012974951003
          vf_explained_var: -0.581901490688324
          vf_loss: 0.0017166610710167636
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,14,187.491,14000,-0.142857,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-18_10-05-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13333333333333333
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1211488551563686
          entropy_coeff: 0.009999999999999998
          kl: 0.01362459821398226
          policy_loss: 0.06957134703795115
          total_loss: 0.052586839637822576
          vf_explained_var: -0.7435972690582275
          vf_loss: 0.0015020610984518297
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,15,197.782,15000,-0.133333,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-18_10-05-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.125
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2260145505269366
          entropy_coeff: 0.009999999999999998
          kl: 0.010386593992773177
          policy_loss: -0.03330863792863157
          total_loss: -0.049439006133211984
          vf_explained_var: -0.7347405552864075
          vf_loss: 0.004052457574936044
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,16,207.757,16000,-0.125,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-18_10-06-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11764705882352941
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.115039261182149
          entropy_coeff: 0.009999999999999998
          kl: 0.014789994586797306
          policy_loss: 0.016896752868261602
          total_loss: 0.0018976710529790984
          vf_explained_var: -0.4274230897426605
          vf_loss: 0.0031933141658858706
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,17,217.746,17000,-0.117647,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-18_10-06-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1111111111111111
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.257981130811903
          entropy_coeff: 0.009999999999999998
          kl: 0.00978872158945647
          policy_loss: -0.09547640590204133
          total_loss: -0.1068228580057621
          vf_explained_var: 0.4405584931373596
          vf_loss: 0.009275616151798102
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,18,227.458,18000,-0.111111,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-18_10-06-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.10526315789473684
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.139578527874417
          entropy_coeff: 0.009999999999999998
          kl: 0.01137547968255323
          policy_loss: -0.0439227856695652
          total_loss: -0.06073622860842281
          vf_explained_var: -0.7091367840766907
          vf_loss: 0.002307245146625468
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,19,237.553,19000,-0.105263,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-18_10-06-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2890706830554537
          entropy_coeff: 0.009999999999999998
          kl: 0.009474833195367162
          policy_loss: -0.08529599778768089
          total_loss: -0.103316220579048
          vf_explained_var: -0.22962182760238647
          vf_loss: 0.0029755147736674798
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,20,245.822,20000,-0.1,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-18_10-06-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09523809523809523
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2092301302485997
          entropy_coeff: 0.009999999999999998
          kl: 0.021165039701802505
          policy_loss: -0.08358113231758277
          total_loss: -0.08915461831622654
          vf_explained_var: 0.49158337712287903
          vf_loss: 0.012285808041633572
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,21,254.823,21000,-0.0952381,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-18_10-06-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.09090909090909091
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.500310116344028
          entropy_coeff: 0.009999999999999998
          kl: 0.009462584053949074
          policy_loss: 0.023176496517327096
          total_loss: 0.003612618034498559
          vf_explained_var: -0.07403869926929474
          vf_loss: 0.002600449018549019
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,22,263.444,22000,-0.0909091,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-18_10-07-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08695652173913043
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.606561716397603
          entropy_coeff: 0.009999999999999998
          kl: 0.001634640364166076
          policy_loss: -0.022896224829471772
          total_loss: -0.0483690562347571
          vf_explained_var: 0.7077088952064514
          vf_loss: 0.0001023934458417999
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,23,273.356,23000,-0.0869565,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-18_10-07-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08333333333333333
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.0317751513587106
          entropy_coeff: 0.009999999999999998
          kl: 0.016389916724409165
          policy_loss: -0.10435375591946973
          total_loss: -0.12068382038010492
          vf_explained_var: -0.6981037855148315
          vf_loss: 0.001529201044791585
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,24,283.641,24000,-0.0833333,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-18_10-07-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.108827859825558
          entropy_coeff: 0.009999999999999998
          kl: 0.012782489632911023
          policy_loss: 0.04386012309955226
          total_loss: 0.029232029451264276
          vf_explained_var: -0.1897844672203064
          vf_loss: 0.004542810978212704
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,25,292.993,25000,-0.08,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-18_10-07-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07692307692307693
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.196173922220866
          entropy_coeff: 0.009999999999999998
          kl: 0.015051619816734125
          policy_loss: -0.17696228068735864
          total_loss: -0.194289730137421
          vf_explained_var: -0.043487828224897385
          vf_loss: 0.0023765413384858724
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,26,302.703,26000,-0.0769231,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-18_10-07-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07407407407407407
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2832618872324626
          entropy_coeff: 0.009999999999999998
          kl: 0.013483498383594038
          policy_loss: -0.041976318839523526
          total_loss: -0.05888974852859974
          vf_explained_var: -1.0
          vf_loss: 0.003896665058305694
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 192.168.3.5
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,27,312.934,27000,-0.0740741,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-18_10-07-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07142857142857142
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.340794046719869
          entropy_coeff: 0.009999999999999998
          kl: 0.0132446583163913
          policy_loss: -0.06778586361971166
          total_loss: -0.0855235359734959
          vf_explained_var: -0.7176200151443481
          vf_loss: 0.003683569923871093
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,28,323.033,28000,-0.0714286,0,-2,1000


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-18_10-08-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06896551724137931
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.28643192715115
          entropy_coeff: 0.009999999999999998
          kl: 0.012511591386425132
          policy_loss: -0.05006254739645455
          total_loss: -0.0692457476630807
          vf_explained_var: -0.878873884677887
          vf_loss: 0.0018043770648849507
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,29,333.14,29000,-0.0689655,0,-2,1000




Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-18_10-08-30
  done: false
  episode_len_mean: 996.0666666666667
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06666666666666667
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3590837717056274
          entropy_coeff: 0.009999999999999998
          kl: 0.008589659497914601
          policy_loss: 0.03383567391170396
          total_loss: 0.01238038121826119
          vf_explained_var: -1.0
          vf_loss: 0.000847095013240404
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,30,360.38,30000,-0.0666667,0,-2,996.067


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-18_10-08-42
  done: false
  episode_len_mean: 996.1935483870968
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06451612903225806
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3378247923321194
          entropy_coeff: 0.009999999999999998
          kl: 0.008482138581811455
          policy_loss: -0.04345305444051822
          total_loss: -0.06497596142192681
          vf_explained_var: -1.0
          vf_loss: 0.0005830192055630808
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,31,372.092,31000,-0.0645161,0,-2,996.194


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-18_10-08-52
  done: false
  episode_len_mean: 996.3125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.0625
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.308473587036133
          entropy_coeff: 0.009999999999999998
          kl: 0.010744570184025942
          policy_loss: -0.07316086250874732
          total_loss: -0.09289086138208708
          vf_explained_var: -1.0
          vf_loss: 0.001743050627475087
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.168.3.5
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,32,382.344,32000,-0.0625,0,-2,996.312


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-18_10-09-02
  done: false
  episode_len_mean: 996.4242424242424
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06060606060606061
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2830215374628704
          entropy_coeff: 0.009999999999999998
          kl: 0.012350879362846139
          policy_loss: -0.11028787520610624
          total_loss: -0.12972198190788428
          vf_explained_var: -0.5865018367767334
          vf_loss: 0.001543475619594877
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,33,392.794,33000,-0.0606061,0,-2,996.424


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-18_10-09-13
  done: false
  episode_len_mean: 996.5294117647059
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.058823529411764705
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3102658298280505
          entropy_coeff: 0.009999999999999998
          kl: 0.009285722359608888
          policy_loss: -0.08411404656039344
          total_loss: -0.10522564806871944
          vf_explained_var: -0.8286908268928528
          vf_loss: 0.0005981981516621697
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,34,402.921,34000,-0.0588235,0,-2,996.529


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-18_10-09-23
  done: false
  episode_len_mean: 996.6285714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05714285714285714
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2585052437252466
          entropy_coeff: 0.009999999999999998
          kl: 0.012728553627258351
          policy_loss: -0.037526778142071435
          total_loss: -0.05784857821547323
          vf_explained_var: -0.8716360330581665
          vf_loss: 0.00035396884122747
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,35,413.168,35000,-0.0571429,0,-2,996.629


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-18_10-09-33
  done: false
  episode_len_mean: 996.7222222222222
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05555555555555555
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.27299751440684
          entropy_coeff: 0.009999999999999998
          kl: 0.009408744033687563
          policy_loss: -0.03726676627993584
          total_loss: -0.05702322791847918
          vf_explained_var: -0.6034050583839417
          vf_loss: 0.0015622004997567275
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,36,423.363,36000,-0.0555556,0,-2,996.722


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-18_10-09-43
  done: false
  episode_len_mean: 996.8108108108108
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05405405405405406
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.278931146197849
          entropy_coeff: 0.009999999999999998
          kl: 0.01164173757782585
          policy_loss: -0.013244051569037968
          total_loss: -0.03353134344021479
          vf_explained_var: -0.8039679527282715
          vf_loss: 0.0007557584891199237
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,37,433.414,37000,-0.0540541,0,-2,996.811


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-18_10-09-53
  done: false
  episode_len_mean: 996.8947368421053
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05263157894736842
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2992008288701373
          entropy_coeff: 0.009999999999999998
          kl: 0.012328957178665532
          policy_loss: -0.024265974366830454
          total_loss: -0.04488927427058419
          vf_explained_var: -0.9647973775863647
          vf_loss: 0.0005193639386561699
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,38,443.594,38000,-0.0526316,0,-2,996.895


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-18_10-10-03
  done: false
  episode_len_mean: 996.974358974359
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05128205128205128
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.289631321695116
          entropy_coeff: 0.009999999999999998
          kl: 0.00927927369254916
          policy_loss: -0.03176774010062218
          total_loss: -0.053092758357524875
          vf_explained_var: -0.9371511936187744
          vf_loss: 0.00017940239324363777
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,39,453.669,39000,-0.0512821,0,-2,996.974


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-18_10-10-14
  done: false
  episode_len_mean: 997.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.287764321433173
          entropy_coeff: 0.009999999999999998
          kl: 0.011653843056801507
          policy_loss: 0.013590118371778065
          total_loss: -0.007083112125595411
          vf_explained_var: -1.0
          vf_loss: 0.00045633607062174836
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.168.3.5
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,40,463.816,40000,-0.05,0,-2,997.05


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-18_10-10-24
  done: false
  episode_len_mean: 997.1219512195122
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04878048780487805
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2976620700624255
          entropy_coeff: 0.009999999999999998
          kl: 0.009117265284183404
          policy_loss: 0.026681849929607575
          total_loss: 0.005243268867747651
          vf_explained_var: -0.6900829672813416
          vf_loss: 0.00017044857175076483
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,41,474.086,41000,-0.0487805,0,-2,997.122


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-18_10-10-34
  done: false
  episode_len_mean: 997.1904761904761
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.047619047619047616
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.299887839953105
          entropy_coeff: 0.009999999999999998
          kl: 0.007687775520375527
          policy_loss: -0.016077745147049426
          total_loss: -0.03758685404641761
          vf_explained_var: -1.0
          vf_loss: 0.0003366022991637389
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,42,484.369,42000,-0.047619,0,-2,997.19


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-18_10-10-44
  done: false
  episode_len_mean: 997.2558139534884
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.046511627906976744
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.379678983158535
          entropy_coeff: 0.009999999999999998
          kl: 0.016250515255546875
          policy_loss: 0.08174675413303906
          total_loss: 0.06111873338619868
          vf_explained_var: -0.9726847410202026
          vf_loss: 0.0007311899407391643
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,43,494.144,43000,-0.0465116,0,-2,997.256


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-18_10-10-54
  done: false
  episode_len_mean: 997.3181818181819
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.045454545454545456
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.425692325168186
          entropy_coeff: 0.009999999999999998
          kl: 0.013473731710013818
          policy_loss: -0.025582917407155036
          total_loss: -0.047344146317078006
          vf_explained_var: -1.0
          vf_loss: 0.0004746318287086777
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,44,504.604,44000,-0.0454545,0,-2,997.318


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-18_10-11-05
  done: false
  episode_len_mean: 997.3777777777777
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.044444444444444446
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.354156960381402
          entropy_coeff: 0.009999999999999998
          kl: 0.012020968897928617
          policy_loss: 0.0065367976617481975
          total_loss: -0.0010093010341127714
          vf_explained_var: -1.0
          vf_loss: 0.014192326460357032
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,45,515.283,45000,-0.0444444,0,-2,997.378


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-18_10-11-16
  done: false
  episode_len_mean: 997.4347826086956
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.043478260869565216
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4274791320165
          entropy_coeff: 0.009999999999999998
          kl: 0.010063812323440195
          policy_loss: -0.05756257801420159
          total_loss: -0.07982082015110387
          vf_explained_var: -1.0
          vf_loss: 0.000506976311670668
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,46,526.062,46000,-0.0434783,0,-2,997.435


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-18_10-11-27
  done: false
  episode_len_mean: 997.4893617021277
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.0425531914893617
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.414933268229167
          entropy_coeff: 0.009999999999999998
          kl: 0.010678499376569799
          policy_loss: 0.002512597499622239
          total_loss: -0.01951062451634142
          vf_explained_var: -1.0
          vf_loss: 0.0005243349920621969
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,47,536.692,47000,-0.0425532,0,-2,997.489


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-18_10-11-37
  done: false
  episode_len_mean: 997.5416666666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.041666666666666664
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3959426482518515
          entropy_coeff: 0.009999999999999998
          kl: 0.01308219739717676
          policy_loss: -0.05078372359275818
          total_loss: -0.06946339685883787
          vf_explained_var: -0.9998812079429626
          vf_loss: 0.003317420218243367
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,48,547.21,48000,-0.0416667,0,-2,997.542


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-18_10-11-48
  done: false
  episode_len_mean: 997.5918367346939
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.061224489795918366
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3756461064020793
          entropy_coeff: 0.009999999999999998
          kl: 0.015832247600380375
          policy_loss: -0.03956616794069608
          total_loss: -0.009259256720542907
          vf_explained_var: -0.5134047269821167
          vf_loss: 0.051688535281250045
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,49,557.769,49000,-0.0612245,0,-2,997.592


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-18_10-11-58
  done: false
  episode_len_mean: 997.64
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.0
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2876119348737927
          entropy_coeff: 0.009999999999999998
          kl: 0.015275066666130993
          policy_loss: -0.024288906157016753
          total_loss: 0.007941013491815991
          vf_explained_var: -0.4320981800556183
          vf_loss: 0.052814781148400575
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,50,568.277,50000,0,3,-2,997.64


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-18_10-12-09
  done: false
  episode_len_mean: 997.6862745098039
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.0
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.381451718012492
          entropy_coeff: 0.009999999999999998
          kl: 0.010531038597114585
          policy_loss: -0.03148355678551727
          total_loss: -0.05050406774712934
          vf_explained_var: -0.7867043614387512
          vf_loss: 0.0032143491954128777
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,51,578.525,51000,0,3,-2,997.686


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-18_10-12-19
  done: false
  episode_len_mean: 997.7307692307693
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.019230769230769232
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4193272829055785
          entropy_coeff: 0.009999999999999998
          kl: 0.007318911992729099
          policy_loss: -0.033296646508905625
          total_loss: -0.05421226227449046
          vf_explained_var: -0.9658640623092651
          vf_loss: 0.0021798186301667656
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,52,588.749,52000,-0.0192308,3,-2,997.731


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-18_10-12-29
  done: false
  episode_len_mean: 997.7735849056604
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.018867924528301886
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3643340190251667
          entropy_coeff: 0.009999999999999998
          kl: 0.008690289520602345
          policy_loss: -0.0753146403365665
          total_loss: -0.09591665259665913
          vf_explained_var: -0.9879668354988098
          vf_loss: 0.0017377842204748756
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,53,598.867,53000,-0.0188679,3,-2,997.774


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-18_10-12-39
  done: false
  episode_len_mean: 997.8148148148148
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.018518518518518517
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3707481384277345
          entropy_coeff: 0.009999999999999998
          kl: 0.010769437253775724
          policy_loss: -0.05385129816002316
          total_loss: -0.07310596406459809
          vf_explained_var: -0.9533731937408447
          vf_loss: 0.002837399693412913
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,54,608.901,54000,-0.0185185,3,-2,997.815


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-18_10-12-49
  done: false
  episode_len_mean: 997.8545454545455
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.01818181818181818
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.374098367161221
          entropy_coeff: 0.009999999999999998
          kl: 0.010605229774074385
          policy_loss: -0.09519330544604196
          total_loss: -0.11536521017551422
          vf_explained_var: -0.7838355302810669
          vf_loss: 0.001978295959997922
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,55,619.068,55000,-0.0181818,3,-2,997.855


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-18_10-12-59
  done: false
  episode_len_mean: 997.8928571428571
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.017857142857142856
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.293845600552029
          entropy_coeff: 0.009999999999999998
          kl: 0.012454466318778293
          policy_loss: -0.07945406859119733
          total_loss: -0.09862838971118132
          vf_explained_var: -1.0
          vf_loss: 0.0018959650899180109
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,56,629.194,56000,-0.0178571,3,-2,997.893


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-18_10-13-09
  done: false
  episode_len_mean: 997.9298245614035
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.017543859649122806
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.316038070784675
          entropy_coeff: 0.009999999999999998
          kl: 0.010175945680196355
          policy_loss: -0.059258715622127055
          total_loss: -0.07914431442817052
          vf_explained_var: -1.0
          vf_loss: 0.0017483914076971512
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,57,639.265,57000,-0.0175439,3,-2,997.93


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-18_10-13-20
  done: false
  episode_len_mean: 997.9655172413793
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13793103448275862
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2889952659606934
          entropy_coeff: 0.009999999999999998
          kl: 0.007750952367877571
          policy_loss: -0.2509398250116242
          total_loss: -0.2700495956672562
          vf_explained_var: -0.6447222232818604
          vf_loss: 0.0026175398353694215
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,58,649.478,58000,-0.137931,3,-7,997.966


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-18_10-13-30
  done: false
  episode_len_mean: 998.0
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13559322033898305
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.328983391655816
          entropy_coeff: 0.009999999999999998
          kl: 0.01722646997004778
          policy_loss: -0.04321821936302715
          total_loss: -0.060315344027347034
          vf_explained_var: -0.6799799799919128
          vf_loss: 0.0036087414165700063
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,59,659.418,59000,-0.135593,3,-7,998




Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-18_10-13-56
  done: false
  episode_len_mean: 995.9666666666667
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13333333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.156905542479621
          entropy_coeff: 0.009999999999999998
          kl: 0.015450866181032384
          policy_loss: -0.04976804123984443
          total_loss: -0.0660186661200391
          vf_explained_var: -0.7155047655105591
          vf_loss: 0.00300079708168697
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,60,686.176,60000,-0.133333,3,-7,995.967


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-18_10-14-07
  done: false
  episode_len_mean: 996.0327868852459
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.18032786885245902
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.24154335392846
          entropy_coeff: 0.009999999999999998
          kl: 0.021789675442439316
          policy_loss: 0.0019959689842330083
          total_loss: 0.01678810119628906
          vf_explained_var: -0.43423575162887573
          vf_loss: 0.033939114104335505
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,61,696.988,61000,-0.180328,3,-7,996.033


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-18_10-14-17
  done: false
  episode_len_mean: 996.0967741935484
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1774193548387097
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.302064220110575
          entropy_coeff: 0.009999999999999998
          kl: 0.011728631873496553
          policy_loss: -0.12853300505214268
          total_loss: -0.13969859840969245
          vf_explained_var: 0.21008874475955963
          vf_loss: 0.009216108845753803
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,62,706.403,62000,-0.177419,3,-7,996.097


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-18_10-14-26
  done: false
  episode_len_mean: 996.1587301587301
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1746031746031746
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.993754670355055
          entropy_coeff: 0.009999999999999998
          kl: 0.017320552444402646
          policy_loss: -0.04478741897684005
          total_loss: -0.058543177342249286
          vf_explained_var: -0.8688874840736389
          vf_loss: 0.0022846651257067505
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,63,715.935,63000,-0.174603,3,-7,996.159


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-18_10-14-36
  done: false
  episode_len_mean: 996.21875
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.171875
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.231626926528083
          entropy_coeff: 0.009999999999999998
          kl: 0.020322032428893008
          policy_loss: -0.08782083456818428
          total_loss: -0.10167825584713784
          vf_explained_var: -0.5363778471946716
          vf_loss: 0.003886389741415365
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,64,725.409,64000,-0.171875,3,-7,996.219


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-18_10-14-45
  done: false
  episode_len_mean: 996.276923076923
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.16923076923076924
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3119264629152085
          entropy_coeff: 0.009999999999999998
          kl: 0.014481094065915013
          policy_loss: -0.07502325707011753
          total_loss: -0.0685908564676841
          vf_explained_var: -0.15499289333820343
          vf_loss: 0.02466429522157543
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,65,734.657,65000,-0.169231,3,-7,996.277


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-18_10-14-54
  done: false
  episode_len_mean: 996.3333333333334
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.16666666666666666
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2600330008400813
          entropy_coeff: 0.009999999999999998
          kl: 0.010871170485477795
          policy_loss: -0.07687670385671987
          total_loss: -0.09290663893851969
          vf_explained_var: -0.15999555587768555
          vf_loss: 0.0029013742020146715
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,66,744.055,66000,-0.166667,3,-7,996.333


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-18_10-15-04
  done: false
  episode_len_mean: 996.3880597014926
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.16417910447761194
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1631225029627483
          entropy_coeff: 0.009999999999999998
          kl: 0.008114458839479057
          policy_loss: -0.00018779606454902224
          total_loss: -0.01674491481648551
          vf_explained_var: -0.6313847899436951
          vf_loss: 0.0023354756542378003
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,67,753.296,67000,-0.164179,3,-7,996.388


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-18_10-15-13
  done: false
  episode_len_mean: 996.4411764705883
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.16176470588235295
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3870711273617213
          entropy_coeff: 0.009999999999999998
          kl: 0.013484920642561442
          policy_loss: 0.0010379111601246727
          total_loss: -0.01219221285233895
          vf_explained_var: -0.005877804942429066
          vf_loss: 0.006089426557688664
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,68,762.832,68000,-0.161765,3,-7,996.441


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-18_10-15-23
  done: false
  episode_len_mean: 996.4927536231884
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.15942028985507245
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.176905319425795
          entropy_coeff: 0.009999999999999998
          kl: 0.010523906018436552
          policy_loss: -0.0286072658167945
          total_loss: -0.04579759066303571
          vf_explained_var: -0.6631717085838318
          vf_loss: 0.0010269101612114659
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,69,772.288,69000,-0.15942,3,-7,996.493


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-18_10-15-32
  done: false
  episode_len_mean: 996.5428571428571
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.15714285714285714
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.740238814883762
          entropy_coeff: 0.009999999999999998
          kl: 0.012888456061343164
          policy_loss: 0.005294753445519341
          total_loss: -0.005845177628927761
          vf_explained_var: -0.6892566084861755
          vf_loss: 0.0019126034257674796
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,70,781.812,70000,-0.157143,3,-7,996.543


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-18_10-15-42
  done: false
  episode_len_mean: 996.5915492957746
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.15492957746478872
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2099958525763617
          entropy_coeff: 0.009999999999999998
          kl: 0.0110070932846789
          policy_loss: -0.10320710506704119
          total_loss: -0.11439797828594843
          vf_explained_var: -0.3106539845466614
          vf_loss: 0.00719419246694694
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,71,791.164,71000,-0.15493,3,-7,996.592


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-18_10-15-51
  done: false
  episode_len_mean: 996.6388888888889
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1527777777777778
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4089283598793876
          entropy_coeff: 0.009999999999999998
          kl: 0.011029963944143685
          policy_loss: -0.011390405231051975
          total_loss: -0.028176533348030514
          vf_explained_var: -0.9600671529769897
          vf_loss: 0.0035805432436366875
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,72,800.662,72000,-0.152778,3,-7,996.639


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-18_10-16-01
  done: false
  episode_len_mean: 996.6849315068494
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1506849315068493
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.157845906416575
          entropy_coeff: 0.009999999999999998
          kl: 0.010486608374559982
          policy_loss: -0.05172949801716539
          total_loss: -0.06717738426393932
          vf_explained_var: -0.8243348002433777
          vf_loss: 0.0025913429587186934
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,73,810.162,73000,-0.150685,3,-7,996.685


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-18_10-16-10
  done: false
  episode_len_mean: 996.7297297297297
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.14864864864864866
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9251654426256815
          entropy_coeff: 0.009999999999999998
          kl: 0.011617248531512983
          policy_loss: -0.0710789515533381
          total_loss: -0.08407848810570108
          vf_explained_var: -1.0
          vf_loss: 0.002331296772333897
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,74,819.639,74000,-0.148649,3,-7,996.73


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-18_10-16-20
  done: false
  episode_len_mean: 996.7733333333333
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.14666666666666667
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5518976012865702
          entropy_coeff: 0.009999999999999998
          kl: 0.009440720620272201
          policy_loss: -0.05433649801545673
          total_loss: -0.06373752310044235
          vf_explained_var: -0.26456916332244873
          vf_loss: 0.0029317080832293465
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,75,829.177,75000,-0.146667,3,-7,996.773


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-18_10-16-29
  done: false
  episode_len_mean: 996.8157894736842
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.14473684210526316
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3670350895987617
          entropy_coeff: 0.009999999999999998
          kl: 0.01575771066045351
          policy_loss: -0.0493748653266165
          total_loss: -0.06500723374386629
          vf_explained_var: -0.3038804233074188
          vf_loss: 0.002719754211526985
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,76,838.798,76000,-0.144737,3,-7,996.816


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-18_10-16-39
  done: false
  episode_len_mean: 996.8571428571429
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.14285714285714285
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1603705909517075
          entropy_coeff: 0.009999999999999998
          kl: 0.014229375480824697
          policy_loss: -0.07690310184326436
          total_loss: -0.09124424705902735
          vf_explained_var: -0.283062607049942
          vf_loss: 0.0024601472690442784
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,77,848.577,77000,-0.142857,3,-7,996.857


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-18_10-16-49
  done: false
  episode_len_mean: 996.8974358974359
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.14102564102564102
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1822681758138867
          entropy_coeff: 0.009999999999999998
          kl: 0.012823828590207972
          policy_loss: -0.0396022324450314
          total_loss: -0.054329507570299834
          vf_explained_var: -0.6676658987998962
          vf_loss: 0.002767362846578989
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,78,858.184,78000,-0.141026,3,-7,996.897


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-18_10-16-58
  done: false
  episode_len_mean: 996.9367088607595
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13924050632911392
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0501876632372538
          entropy_coeff: 0.009999999999999998
          kl: 0.013846790155795515
          policy_loss: -0.0917613323053552
          total_loss: -0.10419381814491417
          vf_explained_var: -0.41766104102134705
          vf_loss: 0.0033960985821775264
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,79,867.652,79000,-0.139241,3,-7,996.937


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-18_10-17-08
  done: false
  episode_len_mean: 996.975
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1375
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5064215077294243
          entropy_coeff: 0.009999999999999998
          kl: 0.009917298886241434
          policy_loss: -0.061112570431497364
          total_loss: -0.08036700333986017
          vf_explained_var: -0.9404802322387695
          vf_loss: 0.0024626916491090217
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,80,877.226,80000,-0.1375,3,-7,996.975


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-18_10-17-17
  done: false
  episode_len_mean: 997.0123456790124
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13580246913580246
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9302699353959825
          entropy_coeff: 0.009999999999999998
          kl: 0.012640334276966234
          policy_loss: -0.01683764590157403
          total_loss: -0.029392540951569877
          vf_explained_var: -1.0
          vf_loss: 0.002481691470731878
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,81,886.686,81000,-0.135802,3,-7,997.012


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-18_10-17-27
  done: false
  episode_len_mean: 997.0487804878048
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13414634146341464
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3029034561581083
          entropy_coeff: 0.009999999999999998
          kl: 0.014199022656020391
          policy_loss: -0.0972621076222923
          total_loss: -0.10837482195347548
          vf_explained_var: -0.32009801268577576
          vf_loss: 0.0071241508833029205
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,82,896.144,82000,-0.134146,3,-7,997.049


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-18_10-17-36
  done: false
  episode_len_mean: 997.0843373493976
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13253012048192772
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9952758285734389
          entropy_coeff: 0.009999999999999998
          kl: 0.011374155246743189
          policy_loss: -0.11684016349414984
          total_loss: -0.13053088424106438
          vf_explained_var: -0.7157347202301025
          vf_loss: 0.002423258366373678
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,83,905.714,83000,-0.13253,3,-7,997.084


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-18_10-17-46
  done: false
  episode_len_mean: 997.1190476190476
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.13095238095238096
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.08747095796797
          entropy_coeff: 0.009999999999999998
          kl: 0.01031124191841687
          policy_loss: -0.02929033376276493
          total_loss: -0.04488177825179365
          vf_explained_var: -1.0
          vf_loss: 0.0018032185215916899
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,84,915.187,84000,-0.130952,3,-7,997.119


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-18_10-17-56
  done: false
  episode_len_mean: 997.1529411764706
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12941176470588237
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1857131481170655
          entropy_coeff: 0.009999999999999998
          kl: 0.013090514691074872
          policy_loss: -0.06537847821083334
          total_loss: -0.07783794005711873
          vf_explained_var: -0.4576722979545593
          vf_loss: 0.004979620341004597
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,85,924.831,85000,-0.129412,3,-7,997.153


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-18_10-18-05
  done: false
  episode_len_mean: 997.1860465116279
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12790697674418605
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0184190551439922
          entropy_coeff: 0.009999999999999998
          kl: 0.012316577307518983
          policy_loss: -0.11867087735897965
          total_loss: -0.13204878196120262
          vf_explained_var: -0.5750837326049805
          vf_loss: 0.002649440100261321
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,86,934.524,86000,-0.127907,3,-7,997.186


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-18_10-18-15
  done: false
  episode_len_mean: 997.2183908045977
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12643678160919541
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2169319046868217
          entropy_coeff: 0.009999999999999998
          kl: 0.012042088527288216
          policy_loss: -0.09571726541552279
          total_loss: -0.1106333123313056
          vf_explained_var: -0.044689204543828964
          vf_loss: 0.003189068467408005
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,87,944.127,87000,-0.126437,3,-7,997.218


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-18_10-18-24
  done: false
  episode_len_mean: 997.25
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.125
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2118651813930934
          entropy_coeff: 0.009999999999999998
          kl: 0.012470235730030312
          policy_loss: -0.03877365338719553
          total_loss: -0.05469950990130504
          vf_explained_var: -0.455881804227829
          vf_loss: 0.0019840891615280675
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,88,953.571,88000,-0.125,3,-7,997.25


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-18_10-18-34
  done: false
  episode_len_mean: 997.2808988764045
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12359550561797752
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2655070622762046
          entropy_coeff: 0.009999999999999998
          kl: 0.012933785004902557
          policy_loss: -0.04137669038027525
          total_loss: -0.05633626545055045
          vf_explained_var: -0.2809304893016815
          vf_loss: 0.0033303407428320496
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,89,963.464,89000,-0.123596,3,-7,997.281




Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-18_10-19-01
  done: false
  episode_len_mean: 995.8222222222222
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12222222222222222
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.248350397745768
          entropy_coeff: 0.009999999999999998
          kl: 0.014040662193493765
          policy_loss: -0.040853378176689145
          total_loss: -0.0531912926170561
          vf_explained_var: -0.15068352222442627
          vf_loss: 0.005406865075282339
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,90,989.87,90000,-0.122222,3,-7,995.822


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-18_10-19-15
  done: false
  episode_len_mean: 995.8681318681319
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.12087912087912088
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1808314270443385
          entropy_coeff: 0.009999999999999998
          kl: 0.01238612560560585
          policy_loss: -0.010459888436728054
          total_loss: -0.02530612550261948
          vf_explained_var: -0.426871657371521
          vf_loss: 0.0027817552249568204
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,91,1003.98,91000,-0.120879,3,-7,995.868


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-18_10-19-25
  done: false
  episode_len_mean: 995.9130434782609
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11956521739130435
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3976521094640098
          entropy_coeff: 0.009999999999999998
          kl: 0.009352428623911103
          policy_loss: -0.02395745155711969
          total_loss: -0.04330570863352882
          vf_explained_var: -0.9853339195251465
          vf_loss: 0.0014718185376194823
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,92,1013.93,92000,-0.119565,3,-7,995.913


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-18_10-19-35
  done: false
  episode_len_mean: 995.9569892473119
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11827956989247312
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2791106012132434
          entropy_coeff: 0.009999999999999998
          kl: 0.016022829712035353
          policy_loss: -0.07448436406751474
          total_loss: -0.08769543096423149
          vf_explained_var: -0.054530609399080276
          vf_loss: 0.004172335058036778
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,93,1023.93,93000,-0.11828,3,-7,995.957


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-18_10-19-45
  done: false
  episode_len_mean: 996.0
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11702127659574468
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3146308369106716
          entropy_coeff: 0.009999999999999998
          kl: 0.01920597025660662
          policy_loss: -0.08226198479533195
          total_loss: -0.08905006564325757
          vf_explained_var: 0.04210689291357994
          vf_loss: 0.009876211318704817
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,94,1033.7,94000,-0.117021,3,-7,996


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-18_10-19-54
  done: false
  episode_len_mean: 996.0421052631579
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11578947368421053
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.224857375356886
          entropy_coeff: 0.009999999999999998
          kl: 0.010713747562378452
          policy_loss: -0.09158550028999647
          total_loss: -0.10748129594657156
          vf_explained_var: -0.5530575513839722
          vf_loss: 0.002736884676333931
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,95,1043.44,95000,-0.115789,3,-7,996.042


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-18_10-20-04
  done: false
  episode_len_mean: 996.0833333333334
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11458333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2196968264049954
          entropy_coeff: 0.009999999999999998
          kl: 0.011751038271460403
          policy_loss: -0.0517162831293212
          total_loss: -0.06720271454089217
          vf_explained_var: -0.723035454750061
          vf_loss: 0.002744561880050848
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,96,1052.96,96000,-0.114583,3,-7,996.083


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-18_10-20-13
  done: false
  episode_len_mean: 996.1237113402062
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1134020618556701
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3587436119715375
          entropy_coeff: 0.009999999999999998
          kl: 0.01514238928838717
          policy_loss: -0.038317670631739825
          total_loss: -0.054448519233200286
          vf_explained_var: 0.1852199137210846
          vf_loss: 0.002346030065220677
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,97,1062.43,97000,-0.113402,3,-7,996.124


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-18_10-20-23
  done: false
  episode_len_mean: 996.1632653061224
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11224489795918367
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.406292457050747
          entropy_coeff: 0.009999999999999998
          kl: 0.009447756607495453
          policy_loss: -0.06978459830085436
          total_loss: -0.08877134608725706
          vf_explained_var: -0.5617446303367615
          vf_loss: 0.0018875559584961997
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,98,1072.18,98000,-0.112245,3,-7,996.163


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-18_10-20-33
  done: false
  episode_len_mean: 996.2020202020202
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1111111111111111
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4170390870836047
          entropy_coeff: 0.009999999999999998
          kl: 0.012479133197284267
          policy_loss: 0.026145242692695723
          total_loss: 0.007764018601220515
          vf_explained_var: -0.45132899284362793
          vf_loss: 0.0015774552257628077
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,99,1081.93,99000,-0.111111,3,-7,996.202


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-18_10-20-43
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.11
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3702504131529065
          entropy_coeff: 0.009999999999999998
          kl: 0.01530826646832624
          policy_loss: -0.049971630051732066
          total_loss: -0.06614410252206855
          vf_explained_var: -0.2079268991947174
          vf_loss: 0.0023634914114760855
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,100,1091.72,100000,-0.11,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-18_10-20-53
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.09
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3454988532596164
          entropy_coeff: 0.009999999999999998
          kl: 0.009368502779101204
          policy_loss: -0.024170911353495386
          total_loss: -0.04282171010143227
          vf_explained_var: -0.38917243480682373
          vf_loss: 0.0016423184010717605
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,101,1101.41,101000,-0.09,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-18_10-21-02
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.09
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1906018071704443
          entropy_coeff: 0.009999999999999998
          kl: 0.011588106536698107
          policy_loss: -0.06915400291068687
          total_loss: -0.08344121244218615
          vf_explained_var: -0.8119156956672668
          vf_loss: 0.0037078192367011476
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,102,1110.8,102000,-0.09,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-18_10-21-12
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.09
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3332026455137465
          entropy_coeff: 0.009999999999999998
          kl: 0.015461548619634552
          policy_loss: -0.013269064037336244
          total_loss: -0.030353167653083803
          vf_explained_var: 0.2256825566291809
          vf_loss: 0.0010296512737921956
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,103,1120.44,103000,-0.09,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-18_10-21-21
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.09
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3891367435455324
          entropy_coeff: 0.009999999999999998
          kl: 0.011973381691388596
          policy_loss: 0.04211009720133411
          total_loss: 0.022900828760531212
          vf_explained_var: 0.2831714451313019
          vf_loss: 0.0006410822857611089
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,104,1130.21,104000,-0.09,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-18_10-21-31
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.09
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.052562508318159
          entropy_coeff: 0.009999999999999998
          kl: 0.012811820025557866
          policy_loss: -0.11291993624634213
          total_loss: -0.12625502070619
          vf_explained_var: -0.5048343539237976
          vf_loss: 0.0028665516641922296
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,105,1139.79,105000,-0.09,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-18_10-21-41
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1587016661961873
          entropy_coeff: 0.009999999999999998
          kl: 0.012932984964331122
          policy_loss: -0.02919426483826505
          total_loss: -0.03497290106283294
          vf_explained_var: 0.4042890667915344
          vf_loss: 0.011443499246767411
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,106,1149.67,106000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-18_10-21-51
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.238070731692844
          entropy_coeff: 0.009999999999999998
          kl: 0.012155563361255685
          policy_loss: -0.02121853240662151
          total_loss: -0.03619371659070667
          vf_explained_var: 0.13392654061317444
          vf_loss: 0.003303020238591772
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,107,1159.59,107000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-18_10-22-01
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.136349826388889
          entropy_coeff: 0.009999999999999998
          kl: 0.015706336186954276
          policy_loss: -0.09533599629584286
          total_loss: -0.10808177679363225
          vf_explained_var: 0.3203541338443756
          vf_loss: 0.0033168294686927564
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,108,1169.24,108000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-18_10-22-10
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1985828744040594
          entropy_coeff: 0.009999999999999998
          kl: 0.01251144518320683
          policy_loss: -0.03589234302441279
          total_loss: -0.05131308651632733
          vf_explained_var: -0.6736879348754883
          vf_loss: 0.0023424722719937565
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,109,1178.87,109000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-18_10-22-20
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2607067796919083
          entropy_coeff: 0.009999999999999998
          kl: 0.01156823888619621
          policy_loss: 0.012992616184055806
          total_loss: -0.004464457722173797
          vf_explained_var: -0.6268162727355957
          vf_loss: 0.001245710513709734
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,110,1188.57,110000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-18_10-22-29
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2204819546805488
          entropy_coeff: 0.009999999999999998
          kl: 0.012009975130145309
          policy_loss: -0.02577392670015494
          total_loss: -0.04288282692432403
          vf_explained_var: -0.33106938004493713
          vf_loss: 0.0010425510900353807
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,111,1198.01,111000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-18_10-22-39
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.195283304320441
          entropy_coeff: 0.009999999999999998
          kl: 0.012678977669102344
          policy_loss: -0.011234678824742635
          total_loss: -0.02672585758070151
          vf_explained_var: -0.75128573179245
          vf_loss: 0.0021824983226704513
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,112,1207.51,112000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-18_10-22-48
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.03147318760554
          entropy_coeff: 0.009999999999999998
          kl: 0.014422635364251373
          policy_loss: -0.05765251744952467
          total_loss: -0.07128473669290543
          vf_explained_var: -0.5591233372688293
          vf_loss: 0.001814873470316848
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,113,1217.01,113000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-18_10-22-58
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8005240930451287
          entropy_coeff: 0.009999999999999998
          kl: 0.012943879572431004
          policy_loss: 0.00804068711068895
          total_loss: -0.0035160446746481788
          vf_explained_var: -0.05438245087862015
          vf_loss: 0.0020799548002994723
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,114,1226.5,114000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-18_10-23-07
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.944789997736613
          entropy_coeff: 0.009999999999999998
          kl: 0.015942453237049292
          policy_loss: -0.032724656578567296
          total_loss: -0.04422980517976814
          vf_explained_var: -0.13467155396938324
          vf_loss: 0.0025621695663883455
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,115,1236.07,115000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-18_10-23-17
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2152086363898382
          entropy_coeff: 0.009999999999999998
          kl: 0.0123266943658897
          policy_loss: -0.014744355902075767
          total_loss: -0.031724430951807235
          vf_explained_var: -0.3380115032196045
          vf_loss: 0.0010117540433485475
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,116,1245.54,116000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-18_10-23-26
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.903273171848721
          entropy_coeff: 0.009999999999999998
          kl: 0.01490904797624042
          policy_loss: -0.10905911365730894
          total_loss: -0.12067067018813557
          vf_explained_var: -0.29598376154899597
          vf_loss: 0.002389371843956825
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,117,1255.02,117000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-18_10-23-36
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.048433526357015
          entropy_coeff: 0.009999999999999998
          kl: 0.013237169208938127
          policy_loss: -0.027378410732166635
          total_loss: -0.04057334938810931
          vf_explained_var: -0.5003200173377991
          vf_loss: 0.002821851820529749
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,118,1264.48,118000,-0.1,3,-7,996.24


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-18_10-23-45
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2815362188551163
          entropy_coeff: 0.009999999999999998
          kl: 0.015556366305028446
          policy_loss: -0.13137154032786688
          total_loss: -0.14804957293801838
          vf_explained_var: -0.36670222878456116
          vf_loss: 0.000887052981771477
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,119,1273.88,119000,-0.1,3,-7,996.24




Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-18_10-24-12
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7926315599017673
          entropy_coeff: 0.009999999999999998
          kl: 0.011571806959652993
          policy_loss: -0.06255546121133698
          total_loss: -0.07482656033502685
          vf_explained_var: -0.29377052187919617
          vf_loss: 0.0017497330929877029
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,120,1300.41,120000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-18_10-24-23
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9004017949104308
          entropy_coeff: 0.009999999999999998
          kl: 0.019933723256215223
          policy_loss: -0.08303491115156147
          total_loss: -0.09287803322076797
          vf_explained_var: 0.08970654010772705
          vf_loss: 0.002433265105355531
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,121,1310.98,121000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-18_10-24-32
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6416691766844855
          entropy_coeff: 0.009999999999999998
          kl: 0.015290371289261234
          policy_loss: -0.04751491298278173
          total_loss: -0.0556906070974138
          vf_explained_var: -0.19546794891357422
          vf_loss: 0.0030805002328836255
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,122,1320.27,122000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-18_10-24-41
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6769129051102531
          entropy_coeff: 0.009999999999999998
          kl: 0.008027170831247184
          policy_loss: -0.0038810446858406067
          total_loss: -0.015816741809248924
          vf_explained_var: -0.1763889491558075
          vf_loss: 0.0021242604086486
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,123,1329.32,123000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-18_10-24-50
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9074831816885207
          entropy_coeff: 0.009999999999999998
          kl: 0.012276440349853132
          policy_loss: -0.008773613162338734
          total_loss: -0.02068428887675206
          vf_explained_var: -0.5198344588279724
          vf_loss: 0.003020857070158753
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,124,1338.39,124000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-18_10-24-59
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.166566963990529
          entropy_coeff: 0.009999999999999998
          kl: 0.014849692619884121
          policy_loss: 0.01771062132385042
          total_loss: 0.003941038416491615
          vf_explained_var: 0.11487376689910889
          vf_loss: 0.002884318660168598
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,125,1347.61,125000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-18_10-25-09
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9185214175118341
          entropy_coeff: 0.009999999999999998
          kl: 0.01357564970726247
          policy_loss: 0.0690712066160308
          total_loss: 0.05644264734453625
          vf_explained_var: -0.10656964033842087
          vf_loss: 0.00197487366773809
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,126,1357.05,126000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-18_10-25-18
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2159938388400606
          entropy_coeff: 0.009999999999999998
          kl: 0.020442763721780367
          policy_loss: -0.0038171343505382536
          total_loss: -0.015505287465122012
          vf_explained_var: -0.6180160641670227
          vf_loss: 0.0035723543473674606
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,127,1366.4,127000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-18_10-25-28
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8719362788730198
          entropy_coeff: 0.009999999999999998
          kl: 0.009766862901539507
          policy_loss: -0.10272436779406335
          total_loss: -0.11440858410464393
          vf_explained_var: -0.705836832523346
          vf_loss: 0.0020906725245165743
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,128,1375.82,128000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-18_10-25-37
  done: false
  episode_len_mean: 994.85
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.070508886708154
          entropy_coeff: 0.009999999999999998
          kl: 0.014659179829892652
          policy_loss: -0.035004354692581625
          total_loss: -0.03865101724449131
          vf_explained_var: 0.13776853680610657
          vf_loss: 0.00963721628844117
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,129,1385.03,129000,-0.1,3,-7,994.85


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-18_10-25-46
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9902697020106845
          entropy_coeff: 0.009999999999999998
          kl: 0.012584147479582935
          policy_loss: -0.04036878148714702
          total_loss: -0.05050702235764927
          vf_explained_var: 0.6147114038467407
          vf_loss: 0.0033937330616431102
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,130,1394.36,130000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-18_10-25-56
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6989320251676772
          entropy_coeff: 0.009999999999999998
          kl: 0.008523167467535325
          policy_loss: -0.056571107978622116
          total_loss: -0.06758367969757981
          vf_explained_var: -0.12957227230072021
          vf_loss: 0.0016618984311612115
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,131,1403.79,131000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-18_10-26-05
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8825081864992776
          entropy_coeff: 0.009999999999999998
          kl: 0.012833961562163133
          policy_loss: -0.02297904549373521
          total_loss: -0.030259071704414157
          vf_explained_var: -0.2087603509426117
          vf_loss: 0.005047860413065387
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,132,1413.18,132000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-18_10-26-14
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.971179927719964
          entropy_coeff: 0.009999999999999998
          kl: 0.010757240102819072
          policy_loss: -0.10405483750833405
          total_loss: -0.11595736688209904
          vf_explained_var: 0.08327030390501022
          vf_loss: 0.0023634174812792075
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,133,1422.47,133000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-18_10-26-24
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7755383398797777
          entropy_coeff: 0.009999999999999998
          kl: 0.011291772685214991
          policy_loss: -0.1165037399985724
          total_loss: -0.12484854602565368
          vf_explained_var: 0.16474750638008118
          vf_loss: 0.003694115740816212
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,134,1431.81,134000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-18_10-26-33
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2063065780533684
          entropy_coeff: 0.009999999999999998
          kl: 0.013702047742756636
          policy_loss: -0.06944526640905274
          total_loss: -0.08092948959933387
          vf_explained_var: 0.07246273756027222
          vf_loss: 0.0036421808750472136
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,135,1441.15,135000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-18_10-26-43
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.61362627281083
          entropy_coeff: 0.009999999999999998
          kl: 0.010932732894461773
          policy_loss: -0.07514935069613987
          total_loss: -0.08350292278660668
          vf_explained_var: -0.1857258528470993
          vf_loss: 0.002247992540166403
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,136,1450.67,136000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-18_10-26-52
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7988484170701768
          entropy_coeff: 0.009999999999999998
          kl: 0.008471773568183988
          policy_loss: -0.00816361726158195
          total_loss: -0.020159576419326995
          vf_explained_var: -0.4965408444404602
          vf_loss: 0.0017036884150002153
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,137,1460.02,137000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-18_10-27-01
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1532381415367126
          entropy_coeff: 0.009999999999999998
          kl: 0.015103910924184187
          policy_loss: -0.054317477448946896
          total_loss: -0.0659006884528531
          vf_explained_var: -0.38337206840515137
          vf_loss: 0.0023028162928918996
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,138,1469.36,138000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-18_10-27-11
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.331081189049615
          entropy_coeff: 0.009999999999999998
          kl: 0.00881206997867881
          policy_loss: -0.06376511667751604
          total_loss: -0.08093607914116648
          vf_explained_var: -0.783125638961792
          vf_loss: 0.0016787370033044782
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,139,1478.64,139000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-18_10-27-20
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2185221089257134
          entropy_coeff: 0.009999999999999998
          kl: 0.010929747582145311
          policy_loss: 0.04124467602620522
          total_loss: 0.028519010978440444
          vf_explained_var: -0.6179059147834778
          vf_loss: 0.003926370946121299
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,140,1488.16,140000,-0.1,3,-7,996.03


Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-18_10-27-30
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2524225976732044
          entropy_coeff: 0.009999999999999998
          kl: 0.01411872601177697
          policy_loss: 0.037227728962898256
          total_loss: 0.0241760298402773
          vf_explained_var: -0.3649364113807678
          vf_loss: 0.0023249186303776997
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,141,1497.85,141000,-0.1,3,-7,996.03




Result for PPO_my_env_82e40_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-18_10-27-40
  done: false
  episode_len_mean: 996.03
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: -0.1
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 7b9a32d98e74411ea92d43be64b53a03
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.053322588072883
          entropy_coeff: 0.009999999999999998
          kl: 0.008812955232567631
          policy_loss: 0.07002799674454663
          total_loss: 0.054626470452381504
          vf_explained_var: 0.33093151450157166
          vf_loss: 0.0006701410867713599
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,142,1507.51,142000,-0.1,3,-7,996.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_82e40_00000,RUNNING,192.168.3.5:35494,142,1507.51,142000,-0.1,3,-7,996.03


[2m[36m(pid=35495)[0m 2021-09-18 10:27:40,094	ERROR worker.py:428 -- SystemExit was raised from the worker
[2m[36m(pid=35495)[0m Traceback (most recent call last):
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 640, in ray._raylet.task_execution_handler
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 525, in ray._raylet.execute_task
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 532, in ray._raylet.execute_task
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 536, in ray._raylet.execute_task
[2m[36m(pid=35495)[0m   File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor
[2m[36m(pid=35495)[0m   File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor
[2m[36m(pid=35495)[0m     return method(__ray_actor, *args, **kwar

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Exception ignored in: <function WandbLoggerCallback.__del__ at 0x7f4ec1a691e0>
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 378, in __del__
    for trial in self._trial_processes:
RuntimeError: dictionary changed size during iteration


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f4ec1d8de10>

0,1
agent_timesteps_total,142000.0
episode_len_mean,996.03
episode_reward_max,3.0
episode_reward_mean,-0.1
episode_reward_min,-7.0
episodes_this_iter,1.0
episodes_total,142.0
info/learner/default_policy/learner_stats/cur_kl_coeff,0.50625
info/learner/default_policy/learner_stats/cur_lr,5e-05
info/learner/default_policy/learner_stats/entropy,2.05332


0,1
agent_timesteps_total,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
episode_len_mean,████████▃▃▄▄▄▅▅▅▅▃▃▃▃▄▄▄▄▂▃▃▃▃▃▃▃▁▁▁▃▃▃▃
episode_reward_max,▁▁▁▁▁▁▁▁▁▁▁▁▁▁██████████████████████████
episode_reward_mean,▁▅▆▇▇▇▇█████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
episode_reward_min,████████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
episodes_this_iter,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
episodes_total,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
info/learner/default_policy/learner_stats/cur_kl_coeff,▂▂▂▂▂▂▄▁▁▁▁▁▁▁▁▁▁▂▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████
info/learner/default_policy/learner_stats/cur_lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
info/learner/default_policy/learner_stats/entropy,▁▄▆▅▆▆█▆▇▇▆▆▇▇▇▇▆▇▆▃▇▇█▅▆▆▇▇▆▆▅▆▆▃▄▄▄▃▆▅
