In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained (AnnaCNN)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-09-20 09:47:46,202	INFO wandb.py:170 -- Already logged into W&B.
2021-09-20 09:47:46,218	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_cede2_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=526121)[0m 2021-09-20 09:47:50,205	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=526121)[0m 2021-09-20 09:47:50,206	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-20_09-48-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5858139713605246
          entropy_coeff: 0.009999999999999998
          kl: 0.004197614454803953
          policy_loss: -0.15425816666748787
          total_loss: -0.14535930887278584
          vf_explained_var: -0.21579505503177643
          vf_loss: 0.023917475492796963
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,1,58.6512,1000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-20_09-49-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6194965256585014
          entropy_coeff: 0.009999999999999998
          kl: 0.005532206874099662
          policy_loss: -0.17034249471293555
          total_loss: -0.17103961118393474
          vf_explained_var: 0.03682316839694977
          vf_loss: 0.014944628553671969
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,2,69.563,2000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-20_09-49-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5953680872917175
          entropy_coeff: 0.009999999999999998
          kl: 0.014964428368584571
          policy_loss: -0.17718921684556538
          total_loss: -0.18015459477901458
          vf_explained_var: -0.05769778788089752
          vf_loss: 0.011491860014696915
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,3,80.1374,3000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-20_09-49-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5541441228654649
          entropy_coeff: 0.009999999999999998
          kl: 0.00809386123389992
          policy_loss: -0.17818717277712293
          total_loss: -0.18522295413745773
          vf_explained_var: -0.12410946190357208
          vf_loss: 0.007696273977247378
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,4,90.5591,4000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-20_09-49-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.558564551671346
          entropy_coeff: 0.009999999999999998
          kl: 0.009473096462409892
          policy_loss: -0.1829755362537172
          total_loss: -0.1923202234837744
          vf_explained_var: 0.08727315068244934
          vf_loss: 0.005293649841203458
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,5,100.88,5000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-20_09-49-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5372863544358149
          entropy_coeff: 0.009999999999999998
          kl: 0.013453825122942121
          policy_loss: -0.18222500185171764
          total_loss: -0.1923919356531567
          vf_explained_var: -0.5111744999885559
          vf_loss: 0.0038605467028295
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,6,111.322,6000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-20_09-49-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5287842843267652
          entropy_coeff: 0.009999999999999998
          kl: 0.006990541210717617
          policy_loss: -0.1392275402115451
          total_loss: -0.1504151001572609
          vf_explained_var: -0.24355006217956543
          vf_loss: 0.003401228506118059
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,7,121.74,7000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-20_09-50-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5058212187555102
          entropy_coeff: 0.009999999999999998
          kl: 0.006159087489985361
          policy_loss: -0.17018505103058285
          total_loss: -0.18209755222002666
          vf_explained_var: 0.13511773943901062
          vf_loss: 0.0025298025853569724
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,8,132.037,8000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-20_09-50-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.484717889626821
          entropy_coeff: 0.009999999999999998
          kl: 0.008099556977441925
          policy_loss: -0.16359848495986726
          total_loss: -0.1758522735701667
          vf_explained_var: -0.7904955744743347
          vf_loss: 0.0017834352219425556
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,9,142.351,9000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-20_09-50-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.377723713715871
          entropy_coeff: 0.009999999999999998
          kl: 0.007246564249931511
          policy_loss: -0.15296834210554758
          total_loss: -0.16443105770481958
          vf_explained_var: -0.3074095547199249
          vf_loss: 0.0015898620877932343
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,10,152.619,10000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-20_09-50-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3611518541971843
          entropy_coeff: 0.009999999999999998
          kl: 0.0077571270873462855
          policy_loss: -0.1761245269742277
          total_loss: -0.18774964983264605
          vf_explained_var: -0.7157691717147827
          vf_loss: 0.0012106834471018778
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,11,162.892,11000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-20_09-50-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3046043621169197
          entropy_coeff: 0.009999999999999998
          kl: 0.007648460003856725
          policy_loss: -0.15459945239126682
          total_loss: -0.16573272819320362
          vf_explained_var: -0.6591548323631287
          vf_loss: 0.0011479204831024012
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,12,173.2,12000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-20_09-51-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3304601073265077
          entropy_coeff: 0.009999999999999998
          kl: 0.008105187429128548
          policy_loss: -0.14746565032336448
          total_loss: -0.1583812117576599
          vf_explained_var: -0.17959171533584595
          vf_loss: 0.0015785192103875388
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,13,183.552,13000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-20_09-51-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3100615488158331
          entropy_coeff: 0.009999999999999998
          kl: 0.00881421043609943
          policy_loss: -0.1216636001235909
          total_loss: -0.13288540807035235
          vf_explained_var: -0.9628700017929077
          vf_loss: 0.000997386726602498
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,14,193.885,14000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-20_09-51-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2972268144289651
          entropy_coeff: 0.009999999999999998
          kl: 0.007344561188831338
          policy_loss: -0.10320278315080536
          total_loss: -0.11443943857318825
          vf_explained_var: -1.0
          vf_loss: 0.0010011569525684334
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,15,204.135,15000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-20_09-51-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3224179718229505
          entropy_coeff: 0.009999999999999998
          kl: 0.0073289659866219775
          policy_loss: -0.11039118203851912
          total_loss: -0.12232004660699103
          vf_explained_var: -1.0
          vf_loss: 0.0005624193791946811
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,16,214.405,16000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-20_09-51-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2379758384492663
          entropy_coeff: 0.009999999999999998
          kl: 0.018535920275143968
          policy_loss: -0.054843286797404286
          total_loss: -0.06397066466096375
          vf_explained_var: -1.0
          vf_loss: 0.0013987879213851152
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,17,224.653,17000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-20_09-51-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3695458081033496
          entropy_coeff: 0.009999999999999998
          kl: 0.014309020405686202
          policy_loss: -0.11366067880557643
          total_loss: -0.12525681315196885
          vf_explained_var: -0.7251760959625244
          vf_loss: 0.0006684204256291398
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,18,234.901,18000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-20_09-52-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4799692114194234
          entropy_coeff: 0.009999999999999998
          kl: 0.04336783929839088
          policy_loss: -0.013127398449513647
          total_loss: -0.02151659114493264
          vf_explained_var: -0.6003594398498535
          vf_loss: 0.0020737138017365296
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,19,245.239,19000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-20_09-52-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.6129278169737922
          entropy_coeff: 0.009999999999999998
          kl: 0.011256609710478097
          policy_loss: -0.014552241341314382
          total_loss: -0.027912587672472
          vf_explained_var: -0.8331255316734314
          vf_loss: 0.0010804388284062346
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,20,255.561,20000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-20_09-52-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.6792548060417176
          entropy_coeff: 0.009999999999999998
          kl: 0.019762085718053966
          policy_loss: 0.10429486487474707
          total_loss: 0.09195923660364416
          vf_explained_var: -0.8810697793960571
          vf_loss: 0.0014926063026198082
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  node_ip: 192.168.1.100
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,21,265.799,21000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-20_09-52-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.7282009522120159
          entropy_coeff: 0.009999999999999998
          kl: 0.0097559818691694
          policy_loss: -0.07785210692220264
          total_loss: -0.09244877863675356
          vf_explained_var: -1.0
          vf_loss: 0.00122194182250597
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 192.168.1.100
  num_healthy_workers: 1
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,22,276.031,22000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-20_09-52-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.558189825216929
          entropy_coeff: 0.009999999999999998
          kl: 0.02685017134655191
          policy_loss: -0.07684699048598607
          total_loss: -0.08119069147441123
          vf_explained_var: 0.11566727608442307
          vf_loss: 0.007210669889011317
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192.168.1.100
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,23,286.5,23000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-20_09-52-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1652720285786522
          entropy_coeff: 0.009999999999999998
          kl: 0.006185225843367764
          policy_loss: -0.020026194718148975
          total_loss: -0.026667617426978218
          vf_explained_var: 0.2713870406150818
          vf_loss: 0.003619621383647124
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,24,297.003,24000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-20_09-53-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1434491879410213
          entropy_coeff: 0.009999999999999998
          kl: 0.018276799798108218
          policy_loss: -0.16050905519061617
          total_loss: -0.16571457493636343
          vf_explained_var: -0.43983814120292664
          vf_loss: 0.0021166889475555057
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,25,307.301,25000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-20_09-53-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.558248237768809
          entropy_coeff: 0.009999999999999998
          kl: 0.008950635552895865
          policy_loss: -0.12198018175032403
          total_loss: -0.13236192560030355
          vf_explained_var: -0.09826100617647171
          vf_loss: 0.0031868456203180055
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,26,317.525,26000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-20_09-53-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8574481328328452
          entropy_coeff: 0.009999999999999998
          kl: 0.019499372812585097
          policy_loss: -0.04181600643528832
          total_loss: -0.05343319045172797
          vf_explained_var: -0.47997915744781494
          vf_loss: 0.0025699409720901815
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,27,327.808,27000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-20_09-53-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8990823533799914
          entropy_coeff: 0.009999999999999998
          kl: 0.013380723162871967
          policy_loss: -0.11116128423147731
          total_loss: -0.12414450504713588
          vf_explained_var: -0.39647725224494934
          vf_loss: 0.002996941313095805
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,28,338.293,28000,0,0,0,1000


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-20_09-53-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7276380817095438
          entropy_coeff: 0.009999999999999998
          kl: 0.006425732589198368
          policy_loss: -0.1215537486390935
          total_loss: -0.13505873940885066
          vf_explained_var: -0.9283248782157898
          vf_loss: 0.0023255990190793655
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,29,348.72,29000,0,0,0,1000




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-20_09-54-15
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8726647324032253
          entropy_coeff: 0.009999999999999998
          kl: 0.012440836422607592
          policy_loss: -0.1212104214148389
          total_loss: -0.13203075886186627
          vf_explained_var: -0.855684757232666
          vf_loss: 0.005107120186504391
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,30,377.972,30000,0,0,0,996.1


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-20_09-54-26
  done: false
  episode_len_mean: 996.2258064516129
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4500676737891303
          entropy_coeff: 0.009999999999999998
          kl: 0.010757307374219715
          policy_loss: -0.12722025006595586
          total_loss: -0.13363154840966066
          vf_explained_var: 0.1989409178495407
          vf_loss: 0.005668982360253317
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,31,388.886,31000,0,0,0,996.226


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-20_09-54-37
  done: false
  episode_len_mean: 996.34375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6833488676283095
          entropy_coeff: 0.009999999999999998
          kl: 0.010310340574552839
          policy_loss: -0.07916929523150126
          total_loss: -0.08943000170919631
          vf_explained_var: -0.30081331729888916
          vf_loss: 0.004252955030339459
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,32,399.66,32000,0,0,0,996.344


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-20_09-54-48
  done: false
  episode_len_mean: 996.4545454545455
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.088177247842153
          entropy_coeff: 0.009999999999999998
          kl: 0.012264918436711427
          policy_loss: -0.05935015686684185
          total_loss: -0.07388836507582003
          vf_explained_var: -0.7646636366844177
          vf_loss: 0.0035839594732452597
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,33,410.606,33000,0,0,0,996.455


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-20_09-54-59
  done: false
  episode_len_mean: 996.5588235294117
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.651310067706638
          entropy_coeff: 0.009999999999999998
          kl: 0.007644685650080716
          policy_loss: -0.09203054126765993
          total_loss: -0.10355404168367385
          vf_explained_var: 0.33500999212265015
          vf_loss: 0.0032695420319214462
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,34,421.215,34000,0,0,0,996.559


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-20_09-55-09
  done: false
  episode_len_mean: 996.6571428571428
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5658040894402399
          entropy_coeff: 0.009999999999999998
          kl: 0.006939078440723634
          policy_loss: -0.057929689519935185
          total_loss: -0.07066390038364463
          vf_explained_var: -0.9416953325271606
          vf_loss: 0.001362538119752167
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,35,431.876,35000,0,0,0,996.657


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-20_09-55-20
  done: false
  episode_len_mean: 996.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8291881216896906
          entropy_coeff: 0.009999999999999998
          kl: 0.01162053367630745
          policy_loss: -0.06804240312841203
          total_loss: -0.08133806517968575
          vf_explained_var: -0.9954347610473633
          vf_loss: 0.0023815969664913914
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,36,442.213,36000,0,0,0,996.75


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-20_09-55-30
  done: false
  episode_len_mean: 996.8378378378378
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7358187556266784
          entropy_coeff: 0.009999999999999998
          kl: 0.017256798903585346
          policy_loss: 0.0011323141554991404
          total_loss: -0.011267151228255695
          vf_explained_var: -0.22925998270511627
          vf_loss: 0.0010759391019948654
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,37,452.558,37000,0,0,0,996.838


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-20_09-55-41
  done: false
  episode_len_mean: 996.921052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5386323942078484
          entropy_coeff: 0.009999999999999998
          kl: 0.01655774851273893
          policy_loss: 0.011972203022903867
          total_loss: 0.0023925891353024377
          vf_explained_var: -0.9611326456069946
          vf_loss: 0.0020812161137453386
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,38,463.264,38000,0,0,0,996.921


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-20_09-55-51
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3510720100667741
          entropy_coeff: 0.009999999999999998
          kl: 0.010692685461348637
          policy_loss: -0.1342867899272177
          total_loss: -0.14313185132212108
          vf_explained_var: -0.43866315484046936
          vf_loss: 0.0022598041758303427
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,39,473.965,39000,0,0,0,997


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-20_09-56-02
  done: false
  episode_len_mean: 997.075
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8190964539845784
          entropy_coeff: 0.009999999999999998
          kl: 0.01126062118919382
          policy_loss: -0.1528777528968122
          total_loss: -0.16682377929488817
          vf_explained_var: -0.9992848634719849
          vf_loss: 0.0017112986870213515
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,40,484.634,40000,0,0,0,997.075


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-20_09-56-13
  done: false
  episode_len_mean: 997.1463414634146
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.493008542060852
          entropy_coeff: 0.009999999999999998
          kl: 0.01071625280037512
          policy_loss: -0.11689675086074405
          total_loss: -0.12698317699962192
          vf_explained_var: -0.6311197280883789
          vf_loss: 0.0024325005425554183
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,41,495.389,41000,0,0,0,997.146


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-20_09-56-23
  done: false
  episode_len_mean: 997.2142857142857
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6632765942149692
          entropy_coeff: 0.009999999999999998
          kl: 0.01118372453921926
          policy_loss: -0.08368489423559772
          total_loss: -0.09526577707793978
          vf_explained_var: -0.13871583342552185
          vf_loss: 0.0025355432486523772
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,42,505.846,42000,0,0,0,997.214


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-20_09-56-34
  done: false
  episode_len_mean: 997.2790697674419
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5611995140711465
          entropy_coeff: 0.009999999999999998
          kl: 0.00844206152735984
          policy_loss: -0.15036085955798625
          total_loss: -0.16230771210458544
          vf_explained_var: -0.3387606739997864
          vf_loss: 0.0017656793578579607
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,43,516.583,43000,0,0,0,997.279


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-20_09-56-45
  done: false
  episode_len_mean: 997.3409090909091
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9088896407021416
          entropy_coeff: 0.009999999999999998
          kl: 0.0088023972533425
          policy_loss: -0.09697954104178481
          total_loss: -0.1128353880925311
          vf_explained_var: -0.6585257053375244
          vf_loss: 0.0012525076466974698
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,44,527.298,44000,0,0,0,997.341


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-20_09-56-56
  done: false
  episode_len_mean: 997.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1412836644384596
          entropy_coeff: 0.009999999999999998
          kl: 0.02195658371301747
          policy_loss: -0.08099097307357524
          total_loss: -0.08023439678880903
          vf_explained_var: 0.17112188041210175
          vf_loss: 0.007229181495495141
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,45,538.006,45000,0,0,0,997.4


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-20_09-57-06
  done: false
  episode_len_mean: 997.4565217391304
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4814237674077353
          entropy_coeff: 0.009999999999999998
          kl: 0.009000798484940929
          policy_loss: -0.04398535564541817
          total_loss: -0.05182894219954808
          vf_explained_var: -0.39603081345558167
          vf_loss: 0.003932884417331984
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,46,548.682,46000,0,0,0,997.457


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-20_09-57-17
  done: false
  episode_len_mean: 997.5106382978723
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.272872503598531
          entropy_coeff: 0.009999999999999998
          kl: 0.011289457521159108
          policy_loss: -0.1954679869943195
          total_loss: -0.2006102225018872
          vf_explained_var: -0.3827425241470337
          vf_loss: 0.0037762959791709564
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,47,559.282,47000,0,0,0,997.511


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-20_09-57-28
  done: false
  episode_len_mean: 997.5625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6491299404038324
          entropy_coeff: 0.009999999999999998
          kl: 0.010271933615885784
          policy_loss: -0.07210809071030881
          total_loss: -0.08355754009551472
          vf_explained_var: -1.0
          vf_loss: 0.001575072305665041
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,48,570.049,48000,0,0,0,997.562


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-20_09-57-38
  done: false
  episode_len_mean: 997.6122448979592
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0717610822783576
          entropy_coeff: 0.009999999999999998
          kl: 0.00771190025835406
          policy_loss: -0.010155696877174908
          total_loss: -0.0170777741405699
          vf_explained_var: -0.4312187731266022
          vf_loss: 0.0011927659487506995
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,49,580.651,49000,0,0,0,997.612


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-20_09-57-49
  done: false
  episode_len_mean: 997.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1692486021253798
          entropy_coeff: 0.009999999999999998
          kl: 0.006320354063129941
          policy_loss: -0.11434786450117826
          total_loss: -0.123059374673499
          vf_explained_var: -0.2909744381904602
          vf_loss: 0.0008478555805696589
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,50,591.234,50000,0,0,0,997.66


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-20_09-58-00
  done: false
  episode_len_mean: 997.7058823529412
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5684882071283128
          entropy_coeff: 0.009999999999999998
          kl: 0.014190776739632425
          policy_loss: -0.057623132835659716
          total_loss: -0.06498013658242094
          vf_explained_var: -0.35665592551231384
          vf_loss: 0.003538490979311367
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,51,601.899,51000,0,0,0,997.706


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-20_09-58-10
  done: false
  episode_len_mean: 997.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4207330968644885
          entropy_coeff: 0.009999999999999998
          kl: 0.008248601070263935
          policy_loss: -0.12317343768146304
          total_loss: -0.13234148286283015
          vf_explained_var: -0.9148505330085754
          vf_loss: 0.002255381479497171
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,52,612.645,52000,0,0,0,997.75


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-20_09-58-21
  done: false
  episode_len_mean: 997.7924528301887
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2525714092784457
          entropy_coeff: 0.009999999999999998
          kl: 0.009055521230830976
          policy_loss: -0.08268224365181392
          total_loss: -0.08806738216016027
          vf_explained_var: -0.8152797222137451
          vf_loss: 0.004084336997604825
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,53,623.585,53000,0,0,0,997.792


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-20_09-58-32
  done: false
  episode_len_mean: 997.8333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2106985237863328
          entropy_coeff: 0.009999999999999998
          kl: 0.007471635970953283
          policy_loss: -0.027458333058489694
          total_loss: -0.0360347958902518
          vf_explained_var: -0.9381418824195862
          vf_loss: 0.0010088448601891288
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,54,634.104,54000,0,0,0,997.833


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-20_09-58-43
  done: false
  episode_len_mean: 997.8727272727273
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1929300122790867
          entropy_coeff: 0.009999999999999998
          kl: 0.009844739443192117
          policy_loss: 0.011326003074645995
          total_loss: 0.004179682417048349
          vf_explained_var: -0.7260732650756836
          vf_loss: 0.0014603788375906232
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,55,644.708,55000,0,0,0,997.873


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-20_09-58-54
  done: false
  episode_len_mean: 997.9107142857143
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5042117264535693
          entropy_coeff: 0.009999999999999998
          kl: 0.012252818901627431
          policy_loss: -0.09062863199247254
          total_loss: -0.09957182697123951
          vf_explained_var: -0.5400875806808472
          vf_loss: 0.0019635961481576994
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,56,655.63,56000,0,0,0,997.911


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-20_09-59-04
  done: false
  episode_len_mean: 997.9473684210526
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7314734631114537
          entropy_coeff: 0.009999999999999998
          kl: 0.012278510374557901
          policy_loss: 0.041452794935968186
          total_loss: 0.029595153364870282
          vf_explained_var: 0.0036270353011786938
          vf_loss: 0.0013130915040771166
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,57,666.518,57000,0,0,0,997.947


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-20_09-59-15
  done: false
  episode_len_mean: 997.9827586206897
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7211189932293363
          entropy_coeff: 0.009999999999999998
          kl: 0.007882407445900637
          policy_loss: -0.038029252199663056
          total_loss: -0.051425180832544964
          vf_explained_var: -0.44550010561943054
          vf_loss: 0.0011549471136984518
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,58,677.202,58000,0,0,0,997.983


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-20_09-59-26
  done: false
  episode_len_mean: 998.0169491525423
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8325586239496867
          entropy_coeff: 0.009999999999999998
          kl: 0.012730181936953135
          policy_loss: -0.12031067949202326
          total_loss: -0.13222253322601318
          vf_explained_var: 0.17852702736854553
          vf_loss: 0.0021172919077798724
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,59,688.024,59000,0,0,0,998.017




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-20_09-59-54
  done: false
  episode_len_mean: 995.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7515101313591004
          entropy_coeff: 0.009999999999999998
          kl: 0.009417216621032775
          policy_loss: 0.0834349435236719
          total_loss: 0.07107350511683358
          vf_explained_var: -0.10774092376232147
          vf_loss: 0.001975354368591474
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,60,716.447,60000,0,0,0,995.6


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-20_10-00-07
  done: false
  episode_len_mean: 995.672131147541
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0048905849456786
          entropy_coeff: 0.009999999999999998
          kl: 0.019072052863035753
          policy_loss: 0.11928532334665458
          total_loss: 0.10922900508675311
          vf_explained_var: -0.12283191829919815
          vf_loss: 0.0035557697346020076
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,61,729.059,61000,0,0,0,995.672


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-20_10-00-18
  done: false
  episode_len_mean: 995.741935483871
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0557602961858112
          entropy_coeff: 0.009999999999999998
          kl: 0.013399730046987043
          policy_loss: 0.12576284425126183
          total_loss: 0.11139341071248055
          vf_explained_var: -0.3330909013748169
          vf_loss: 0.0016657595018235345
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,62,740.052,62000,0,0,0,995.742


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-20_10-00-29
  done: false
  episode_len_mean: 995.8095238095239
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4368997097015381
          entropy_coeff: 0.009999999999999998
          kl: 0.008463435295373885
          policy_loss: 0.007578576770093706
          total_loss: -0.0024039429095056323
          vf_explained_var: 0.06331554800271988
          vf_loss: 0.0015300670373512226
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,63,750.571,63000,0,0,0,995.81


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-20_10-00-40
  done: false
  episode_len_mean: 995.875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.966723820898268
          entropy_coeff: 0.009999999999999998
          kl: 0.009756126852642153
          policy_loss: -0.10221681470672289
          total_loss: -0.11713004526164797
          vf_explained_var: -0.44134774804115295
          vf_loss: 0.0014613140419694698
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,64,761.445,64000,0,0,0,995.875


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-20_10-00-50
  done: false
  episode_len_mean: 995.9384615384615
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.796142570177714
          entropy_coeff: 0.009999999999999998
          kl: 0.011513616245660864
          policy_loss: 0.014375921835501989
          total_loss: 0.0020699054209722414
          vf_explained_var: -0.14572401344776154
          vf_loss: 0.0017695629050851697
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,65,772.074,65000,0,0,0,995.938


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-20_10-01-01
  done: false
  episode_len_mean: 996.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7610543012619018
          entropy_coeff: 0.009999999999999998
          kl: 0.009355671722882757
          policy_loss: -0.031036658874816363
          total_loss: -0.04401159170601103
          vf_explained_var: -0.3644428253173828
          vf_loss: 0.0014780717610847205
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,66,782.712,66000,0,0,0,996


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-20_10-01-11
  done: false
  episode_len_mean: 996.0597014925373
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6602005726761289
          entropy_coeff: 0.009999999999999998
          kl: 0.01059805362439966
          policy_loss: -0.02388369043668111
          total_loss: -0.031368890818622375
          vf_explained_var: 0.2799522876739502
          vf_loss: 0.0055399626040727725
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,67,793.099,67000,0,0,0,996.06


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-20_10-01-22
  done: false
  episode_len_mean: 996.1176470588235
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8596374697155422
          entropy_coeff: 0.009999999999999998
          kl: 0.011344596041654607
          policy_loss: -0.016750595801406438
          total_loss: -0.029558310657739638
          vf_explained_var: -0.5123997330665588
          vf_loss: 0.0019598563531568894
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,68,803.803,68000,0,0,0,996.118


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-20_10-01-33
  done: false
  episode_len_mean: 996.1739130434783
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5600185645951166
          entropy_coeff: 0.009999999999999998
          kl: 0.011602718153156744
          policy_loss: -0.035578123148944645
          total_loss: -0.0447843697335985
          vf_explained_var: 0.00827204342931509
          vf_loss: 0.002478020189381722
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,69,814.472,69000,0,0,0,996.174


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-20_10-01-44
  done: false
  episode_len_mean: 996.2285714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1141416721873814
          entropy_coeff: 0.009999999999999998
          kl: 0.010111412557520122
          policy_loss: -0.10431660144693322
          total_loss: -0.12122874421377977
          vf_explained_var: -0.845201849937439
          vf_loss: 0.0008166697804376276
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,70,825.586,70000,0,0,0,996.229


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-20_10-01-55
  done: false
  episode_len_mean: 996.2816901408451
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.956453635957506
          entropy_coeff: 0.009999999999999998
          kl: 0.014880328462822707
          policy_loss: -0.0327380095091131
          total_loss: -0.04505559154268768
          vf_explained_var: -0.8026089072227478
          vf_loss: 0.0022248443681746723
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,71,836.33,71000,0,0,0,996.282


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-20_10-02-05
  done: false
  episode_len_mean: 996.3333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3199013498094347
          entropy_coeff: 0.009999999999999998
          kl: 0.006069360336309712
          policy_loss: -0.048854100538624656
          total_loss: -0.05872666835784912
          vf_explained_var: -0.3132691979408264
          vf_loss: 0.0012780370119596935
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,72,846.943,72000,0,0,0,996.333


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-20_10-02-16
  done: false
  episode_len_mean: 996.3835616438356
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0474622805913287
          entropy_coeff: 0.009999999999999998
          kl: 0.013592834084764496
          policy_loss: -0.15053810953266092
          total_loss: -0.16426462564203476
          vf_explained_var: 0.13212865591049194
          vf_loss: 0.002160522281580294
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,73,858.012,73000,0,0,0,996.384


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-20_10-02-27
  done: false
  episode_len_mean: 996.4324324324324
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.745006643401252
          entropy_coeff: 0.009999999999999998
          kl: 0.010161105698174798
          policy_loss: -0.034267088439729476
          total_loss: -0.04633745044055912
          vf_explained_var: -0.6941686272621155
          vf_loss: 0.0019503311241149074
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,74,868.85,74000,0,0,0,996.432


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-20_10-02-38
  done: false
  episode_len_mean: 996.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5399217486381531
          entropy_coeff: 0.009999999999999998
          kl: 0.0119964276988946
          policy_loss: -0.01891687342690097
          total_loss: -0.029223648769160112
          vf_explained_var: -0.8177337050437927
          vf_loss: 0.0010436481065375523
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,75,879.377,75000,0,0,0,996.48


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-20_10-02-49
  done: false
  episode_len_mean: 996.5263157894736
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.006657995118035
          entropy_coeff: 0.009999999999999998
          kl: 0.011085671820336805
          policy_loss: -0.12964454458819497
          total_loss: -0.14445659512033066
          vf_explained_var: 0.14642255008220673
          vf_loss: 0.0015131151575284699
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,76,890.435,76000,0,0,0,996.526


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-20_10-02-59
  done: false
  episode_len_mean: 996.5714285714286
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.648264029290941
          entropy_coeff: 0.009999999999999998
          kl: 0.011490203711383708
          policy_loss: -0.040572264873319205
          total_loss: -0.05222460851073265
          vf_explained_var: -0.5701891779899597
          vf_loss: 0.000952353332993678
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,77,901.049,77000,0,0,0,996.571


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-20_10-03-10
  done: false
  episode_len_mean: 996.6153846153846
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5817711220847235
          entropy_coeff: 0.009999999999999998
          kl: 0.007225866560374743
          policy_loss: 0.0013294087515936957
          total_loss: -0.011312809669309191
          vf_explained_var: -0.7538763284683228
          vf_loss: 0.0007367587632163325
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,78,911.687,78000,0,0,0,996.615


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-20_10-03-21
  done: false
  episode_len_mean: 996.6582278481013
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5734075890647041
          entropy_coeff: 0.009999999999999998
          kl: 0.011861822986915785
          policy_loss: 0.008032419615321688
          total_loss: -0.0031187715629736584
          vf_explained_var: -0.47761133313179016
          vf_loss: 0.0005795217866155629
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,79,922.379,79000,0,0,0,996.658


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-20_10-03-31
  done: false
  episode_len_mean: 996.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0740519205729167
          entropy_coeff: 0.009999999999999998
          kl: 0.004747600712632375
          policy_loss: 0.005381399020552635
          total_loss: -0.0027157407874862354
          vf_explained_var: -0.61545729637146
          vf_loss: 0.0010410635819425806
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,80,933.029,80000,0,0,0,996.7


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-20_10-03-42
  done: false
  episode_len_mean: 996.7407407407408
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.802864678700765
          entropy_coeff: 0.009999999999999998
          kl: 0.010602383718361155
          policy_loss: 0.021347948991590076
          total_loss: 0.0059313370535771055
          vf_explained_var: -0.8866456747055054
          vf_loss: 0.0008228833806545784
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,81,943.748,81000,0,0,0,996.741


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-20_10-03-53
  done: false
  episode_len_mean: 996.780487804878
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.871284956402249
          entropy_coeff: 0.009999999999999998
          kl: 0.01447422682447789
          policy_loss: 0.02866053059697151
          total_loss: 0.012935171524683634
          vf_explained_var: -0.5090610980987549
          vf_loss: 0.0005449645297226703
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,82,954.383,82000,0,0,0,996.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-20_10-04-04
  done: false
  episode_len_mean: 996.8192771084338
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7855614238315158
          entropy_coeff: 0.009999999999999998
          kl: 0.011270293292614319
          policy_loss: 0.03990416203935941
          total_loss: 0.0248137762149175
          vf_explained_var: -0.313468337059021
          vf_loss: 0.0008633664615141849
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,83,965.314,83000,0,0,0,996.819


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-20_10-04-14
  done: false
  episode_len_mean: 996.8571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6633292661772834
          entropy_coeff: 0.009999999999999998
          kl: 0.012555273884961337
          policy_loss: 0.08176937699317932
          total_loss: 0.06810723352763388
          vf_explained_var: -0.7121371626853943
          vf_loss: 0.000852447669280486
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,84,975.593,84000,0,0,0,996.857


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-20_10-04-25
  done: false
  episode_len_mean: 996.8941176470588
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7210422052277459
          entropy_coeff: 0.009999999999999998
          kl: 0.015746788644799874
          policy_loss: 0.0058237951248884205
          total_loss: -0.007885645495520698
          vf_explained_var: -0.7286867499351501
          vf_loss: 0.0008437109341482735
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,85,986.644,85000,0,0,0,996.894


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-20_10-04-36
  done: false
  episode_len_mean: 996.9302325581396
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8338145428233676
          entropy_coeff: 0.009999999999999998
          kl: 0.012358811109130032
          policy_loss: -0.010352836839026875
          total_loss: -0.026151328616672093
          vf_explained_var: -0.800819993019104
          vf_loss: 0.0004541047358846602
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,86,997.739,86000,0,0,0,996.93


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-20_10-04-47
  done: false
  episode_len_mean: 996.9655172413793
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.637742993566725
          entropy_coeff: 0.009999999999999998
          kl: 0.009418358306510024
          policy_loss: 0.017414523950881427
          total_loss: 0.0033224558354251915
          vf_explained_var: -0.5822107195854187
          vf_loss: 0.0006960144146837087
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,87,1008.83,87000,0,0,0,996.966


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-20_10-04-59
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6999667432573107
          entropy_coeff: 0.009999999999999998
          kl: 0.014146510506295269
          policy_loss: -0.029673678138189847
          total_loss: -0.04369761780318287
          vf_explained_var: -0.9999801516532898
          vf_loss: 0.0005885033064664134
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,88,1019.91,88000,0,0,0,997


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-20_10-05-10
  done: false
  episode_len_mean: 997.0337078651686
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.055920895602968
          entropy_coeff: 0.009999999999999998
          kl: 0.03708825004243614
          policy_loss: -0.13196271024644374
          total_loss: -0.1332754297595885
          vf_explained_var: 0.08467429131269455
          vf_loss: 0.0029878449031255313
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,89,1030.98,89000,0,0,0,997.034




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-20_10-05-38
  done: false
  episode_len_mean: 995.5444444444445
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5249440736240811
          entropy_coeff: 0.009999999999999998
          kl: 0.012692299940261876
          policy_loss: 0.008837371236748166
          total_loss: -0.0006930389338069492
          vf_explained_var: -0.3671059012413025
          vf_loss: 0.0025062903157150786
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,90,1059.43,90000,0,0,0,995.544


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-20_10-05-50
  done: false
  episode_len_mean: 995.5934065934066
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4278546465767754
          entropy_coeff: 0.009999999999999998
          kl: 0.015926932414305463
          policy_loss: -0.10152752569152249
          total_loss: -0.11040433678362105
          vf_explained_var: -0.27411922812461853
          vf_loss: 0.0013702299132192922
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,91,1071.14,91000,0,0,0,995.593


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-20_10-06-00
  done: false
  episode_len_mean: 995.6413043478261
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4737313879860772
          entropy_coeff: 0.009999999999999998
          kl: 0.014510054089447048
          policy_loss: -0.0008005213406350877
          total_loss: -0.011278209421369765
          vf_explained_var: -1.0
          vf_loss: 0.0005867677954180787
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip: 192.168.1.100
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,92,1081.1,92000,0,0,0,995.641


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-20_10-06-10
  done: false
  episode_len_mean: 995.6881720430108
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2313952856593662
          entropy_coeff: 0.009999999999999998
          kl: 0.0040323658829963215
          policy_loss: 0.03410986504620976
          total_loss: 0.023185092252161768
          vf_explained_var: -0.7881138920783997
          vf_loss: 0.00036848666374377593
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,93,1091.01,93000,0,0,0,995.688


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-20_10-06-20
  done: false
  episode_len_mean: 995.7340425531914
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3582000388039484
          entropy_coeff: 0.009999999999999998
          kl: 0.007400783599944896
          policy_loss: 0.00591578007572227
          total_loss: -0.005813854353295432
          vf_explained_var: -0.8675096035003662
          vf_loss: 0.0009157065298899802
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,94,1101.28,94000,0,0,0,995.734


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-20_10-06-31
  done: false
  episode_len_mean: 995.7789473684211
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4851659999953375
          entropy_coeff: 0.009999999999999998
          kl: 0.012915107168606157
          policy_loss: -0.02315259901806712
          total_loss: -0.03562256990828448
          vf_explained_var: -0.6930463910102844
          vf_loss: 0.0007471215290327867
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,95,1111.81,95000,0,0,0,995.779


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-20_10-06-41
  done: false
  episode_len_mean: 995.8229166666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2233661519156562
          entropy_coeff: 0.009999999999999998
          kl: 0.012250307000302128
          policy_loss: 0.005738155957725313
          total_loss: -0.00379505240254932
          vf_explained_var: -0.9102221131324768
          vf_loss: 0.0011500218789377767
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,96,1122.27,96000,0,0,0,995.823


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-20_10-06-51
  done: false
  episode_len_mean: 995.8659793814433
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.647290711932712
          entropy_coeff: 0.009999999999999998
          kl: 0.025890152762918936
          policy_loss: -0.06111644140134255
          total_loss: -0.07249728383289443
          vf_explained_var: -0.5092973709106445
          vf_loss: 0.001815341927188759
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,97,1132.63,97000,0,0,0,995.866


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-20_10-07-02
  done: false
  episode_len_mean: 995.9081632653061
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5358658207787408
          entropy_coeff: 0.009999999999999998
          kl: 0.02330661193643727
          policy_loss: 0.016847639448112912
          total_loss: 0.007303644054465824
          vf_explained_var: -0.6382951736450195
          vf_loss: 0.0013900493614427331
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,98,1142.7,98000,0,0,0,995.908


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-20_10-07-11
  done: false
  episode_len_mean: 995.9494949494949
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8475914239883422
          entropy_coeff: 0.009999999999999998
          kl: 0.01733684130771383
          policy_loss: -0.10382630736049679
          total_loss: -0.1134062730293307
          vf_explained_var: -0.03402813896536827
          vf_loss: 0.003959013841166679
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,99,1152.54,99000,0,0,0,995.949


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-20_10-07-22
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6626736137602065
          entropy_coeff: 0.009999999999999998
          kl: 0.011528256122660787
          policy_loss: -0.05002714287903574
          total_loss: -0.06192832158671485
          vf_explained_var: -0.4401041567325592
          vf_loss: 0.0014427024263164235
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,100,1162.91,100000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-20_10-07-32
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5615793188412985
          entropy_coeff: 0.009999999999999998
          kl: 0.007729148698227463
          policy_loss: -0.02521112639353507
          total_loss: -0.037740118806767795
          vf_explained_var: -1.0
          vf_loss: 0.000885803909235013
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,101,1173.49,101000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-20_10-07-43
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.639965017636617
          entropy_coeff: 0.009999999999999998
          kl: 0.012673230601339193
          policy_loss: -0.03982460124211179
          total_loss: -0.0512544303925501
          vf_explained_var: -0.23795267939567566
          vf_loss: 0.001360919308434758
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,102,1183.8,102000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-20_10-07-52
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7493347154723273
          entropy_coeff: 0.009999999999999998
          kl: 0.01504816379802156
          policy_loss: 0.01967264697369602
          total_loss: 0.00729214135143492
          vf_explained_var: -0.8845155239105225
          vf_loss: 0.0008276401530666691
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,103,1193.49,103000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-20_10-08-02
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7845824109183417
          entropy_coeff: 0.009999999999999998
          kl: 0.01436718741332628
          policy_loss: -0.05763891115784645
          total_loss: -0.06949320654902193
          vf_explained_var: -0.6137697100639343
          vf_loss: 0.001900248765014112
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,104,1203.31,104000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-20_10-08-12
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568753189510768
          entropy_coeff: 0.009999999999999998
          kl: 0.009279128040414105
          policy_loss: -0.08740552105009555
          total_loss: -0.10129107928110494
          vf_explained_var: -0.5958526730537415
          vf_loss: 0.00104081969604724
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,105,1213.35,105000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-20_10-08-23
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7599756585227118
          entropy_coeff: 0.009999999999999998
          kl: 0.013332402341523657
          policy_loss: -0.08476591815965043
          total_loss: -0.09773766187330087
          vf_explained_var: -1.0
          vf_loss: 0.0008314040382780756
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,106,1223.91,106000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-20_10-08-33
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.829172013865577
          entropy_coeff: 0.009999999999999998
          kl: 0.012283799545976038
          policy_loss: 0.06862365293006102
          total_loss: 0.05634690978460842
          vf_explained_var: -0.5906594395637512
          vf_loss: 0.0025169747571150464
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,107,1234.32,107000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-20_10-08-44
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8973805970615811
          entropy_coeff: 0.009999999999999998
          kl: 0.008465360107655704
          policy_loss: -0.03721758425235748
          total_loss: -0.05230071594317754
          vf_explained_var: -0.10221656411886215
          vf_loss: 0.0014800314008930906
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,108,1244.54,108000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-20_10-08-54
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8564255926344129
          entropy_coeff: 0.009999999999999998
          kl: 0.017238816821179902
          policy_loss: 0.058320539279116526
          total_loss: 0.04815281459854709
          vf_explained_var: 0.4768558442592621
          vf_loss: 0.0034875062158486497
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,109,1254.75,109000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-20_10-09-04
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.921550084484948
          entropy_coeff: 0.009999999999999998
          kl: 0.013719435933428049
          policy_loss: -0.07562887788646751
          total_loss: -0.08867176295154625
          vf_explained_var: -0.1230088323354721
          vf_loss: 0.0022657906688335868
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,110,1264.71,110000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-20_10-09-14
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.039230063226488
          entropy_coeff: 0.009999999999999998
          kl: 0.013208286862128342
          policy_loss: -0.047241618898179794
          total_loss: -0.06152551248669624
          vf_explained_var: -0.47115975618362427
          vf_loss: 0.002347139451497545
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,111,1274.95,111000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-20_10-09-24
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0753041174676685
          entropy_coeff: 0.009999999999999998
          kl: 0.01307366332317613
          policy_loss: -0.08352688054243723
          total_loss: -0.0982394613739517
          vf_explained_var: -0.26778584718704224
          vf_loss: 0.0023175274642805257
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,112,1284.77,112000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-20_10-09-34
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1569346878263684
          entropy_coeff: 0.009999999999999998
          kl: 0.01174071692463782
          policy_loss: -0.08957970750828584
          total_loss: -0.10583217384086716
          vf_explained_var: -0.7868576645851135
          vf_loss: 0.0019735254269714155
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,113,1294.8,113000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-20_10-09-45
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.090310666296217
          entropy_coeff: 0.009999999999999998
          kl: 0.012381255490338737
          policy_loss: -0.1180449430934257
          total_loss: -0.13401883095502853
          vf_explained_var: -0.3954167366027832
          vf_loss: 0.001403464477819701
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,114,1305.39,114000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-20_10-09-55
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0944940911398993
          entropy_coeff: 0.009999999999999998
          kl: 0.010167176758180465
          policy_loss: -0.08693979494273663
          total_loss: -0.10311116522385014
          vf_explained_var: -0.9304311275482178
          vf_loss: 0.001878307382705518
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,115,1315.56,115000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-20_10-10-05
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1003246770964727
          entropy_coeff: 0.009999999999999998
          kl: 0.01207267765090863
          policy_loss: -0.028623906812734076
          total_loss: -0.04522075984213087
          vf_explained_var: -0.4482373595237732
          vf_loss: 0.0009685098672182195
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,116,1325.67,116000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-20_10-10-15
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.163239622116089
          entropy_coeff: 0.009999999999999998
          kl: 0.014402667099471205
          policy_loss: -0.06417756229639053
          total_loss: -0.07916433935364088
          vf_explained_var: -0.2546488046646118
          vf_loss: 0.0025442361165510696
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,117,1335.56,117000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-20_10-10-25
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.234185700946384
          entropy_coeff: 0.009999999999999998
          kl: 0.011157882156489407
          policy_loss: 0.03616954812573062
          total_loss: 0.018818356614145968
          vf_explained_var: -0.9593501687049866
          vf_loss: 0.0018132844461231595
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,118,1345.45,118000,0,0,0,995.99


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-20_10-10-35
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.220852178997464
          entropy_coeff: 0.009999999999999998
          kl: 0.014939703039475057
          policy_loss: -0.08807926128307979
          total_loss: -0.10478469034036
          vf_explained_var: -0.5743306279182434
          vf_loss: 0.0012487770601486167
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,119,1355.35,119000,0,0,0,995.99




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-20_10-11-01
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.204995244079166
          entropy_coeff: 0.009999999999999998
          kl: 0.012159428560475641
          policy_loss: -0.09263535001211697
          total_loss: -0.10949225864476628
          vf_explained_var: -0.4757828116416931
          vf_loss: 0.001730454324408331
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,120,1381.89,120000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-20_10-11-13
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0398449354701573
          entropy_coeff: 0.009999999999999998
          kl: 0.015261283618736915
          policy_loss: 0.018025765774978532
          total_loss: 0.0029808600743611653
          vf_explained_var: -0.5013144016265869
          vf_loss: 0.0010076543788373885
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,121,1393.57,121000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-20_10-11-23
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.142591342661116
          entropy_coeff: 0.009999999999999998
          kl: 0.014610785237965097
          policy_loss: 0.06645174125830332
          total_loss: 0.050112998651133646
          vf_explained_var: -0.89571213722229
          vf_loss: 0.0009265225771943936
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,122,1403.54,122000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-20_10-11-33
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.071274687184228
          entropy_coeff: 0.009999999999999998
          kl: 0.01315439358902112
          policy_loss: -0.08507957112871939
          total_loss: -0.1001588961109519
          vf_explained_var: -0.6380782127380371
          vf_loss: 0.0018875042468102443
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,123,1413.49,123000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-20_10-11-43
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9564607249365913
          entropy_coeff: 0.009999999999999998
          kl: 0.012605317570645803
          policy_loss: -0.050273012204302685
          total_loss: -0.06237065527174208
          vf_explained_var: 0.18794861435890198
          vf_loss: 0.0038773999109657275
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,124,1423.54,124000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-20_10-11-54
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9589169436030918
          entropy_coeff: 0.009999999999999998
          kl: 0.014654057370452694
          policy_loss: -0.046131092806657156
          total_loss: -0.047830865697728264
          vf_explained_var: 0.4168159067630768
          vf_loss: 0.013716423724933218
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,125,1434.14,125000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-20_10-12-04
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.918297451072269
          entropy_coeff: 0.009999999999999998
          kl: 0.018190640361403495
          policy_loss: 0.06270775089247359
          total_loss: 0.05309251909040742
          vf_explained_var: 0.6278002858161926
          vf_loss: 0.004387675190810114
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,126,1444.64,126000,0,0,0,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-20_10-12-15
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1075443347295124
          entropy_coeff: 0.009999999999999998
          kl: 0.01063816718347829
          policy_loss: -0.012346918135881424
          total_loss: -0.018233144241902564
          vf_explained_var: 0.4703611731529236
          vf_loss: 0.012159836016750584
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,127,1455.28,127000,-0.02,0,-2,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-20_10-12-25
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.999619542227851
          entropy_coeff: 0.009999999999999998
          kl: 0.017246855647761766
          policy_loss: -0.0458010291111552
          total_loss: -0.051677555890960826
          vf_explained_var: 0.4962727427482605
          vf_loss: 0.00920835956704751
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,128,1465.46,128000,-0.02,0,-2,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-20_10-12-35
  done: false
  episode_len_mean: 994.61
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.107128384378221
          entropy_coeff: 0.009999999999999998
          kl: 0.018973140607918954
          policy_loss: -0.08814303527275721
          total_loss: -0.10025665652420786
          vf_explained_var: 0.5864740014076233
          vf_loss: 0.0035547636384661825
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,129,1475.44,129000,-0.02,0,-2,994.61


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-20_10-12-45
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9384185274442036
          entropy_coeff: 0.009999999999999998
          kl: 0.01754838448153306
          policy_loss: -0.00046533702148331536
          total_loss: -0.011275985712806384
          vf_explained_var: 0.5911391973495483
          vf_loss: 0.0035763582033622597
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,130,1485.75,130000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-20_10-12-56
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.961223730776045
          entropy_coeff: 0.009999999999999998
          kl: 0.017813909334317576
          policy_loss: 0.00750369421309895
          total_loss: -0.005730564147233963
          vf_explained_var: -0.046259477734565735
          vf_loss: 0.001305192511386445
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,131,1496.02,131000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-20_10-13-06
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0824272248480056
          entropy_coeff: 0.009999999999999998
          kl: 0.012768057127506557
          policy_loss: -0.07288123418887456
          total_loss: -0.08694125004112721
          vf_explained_var: -0.020901784300804138
          vf_loss: 0.0031283532547402297
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,132,1506.34,132000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-20_10-13-16
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0266743938128156
          entropy_coeff: 0.009999999999999998
          kl: 0.016599137974392913
          policy_loss: -0.06782207807732953
          total_loss: -0.0815213835487763
          vf_explained_var: -0.3142562508583069
          vf_loss: 0.0018405737633454718
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,133,1516.56,133000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-20_10-13-26
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9952035413848028
          entropy_coeff: 0.009999999999999998
          kl: 0.01124789376692716
          policy_loss: -0.08632878752218352
          total_loss: -0.10149182453751564
          vf_explained_var: -0.09631216526031494
          vf_loss: 0.0015859837003517896
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,134,1526.75,134000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-20_10-13-37
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0197070346938237
          entropy_coeff: 0.009999999999999998
          kl: 0.008939235491094957
          policy_loss: -0.0956906565775474
          total_loss: -0.1119194186396069
          vf_explained_var: -0.5435057878494263
          vf_loss: 0.0014227220297066702
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,135,1537.06,135000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-20_10-13-47
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.518281598885854
          entropy_coeff: 0.009999999999999998
          kl: 0.013908289320419895
          policy_loss: -0.015581458641423119
          total_loss: -0.023220203402969573
          vf_explained_var: 0.6369300484657288
          vf_loss: 0.003583467142501225
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,136,1547.62,136000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-20_10-13-57
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1151990758048163
          entropy_coeff: 0.009999999999999998
          kl: 0.013082562157746046
          policy_loss: -0.15557213127613068
          total_loss: -0.1714780141909917
          vf_explained_var: 0.15885911881923676
          vf_loss: 0.0015206430605353995
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,137,1557.6,137000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-20_10-14-08
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.023938771088918
          entropy_coeff: 0.009999999999999998
          kl: 0.01069645590795029
          policy_loss: -0.07910092891090446
          total_loss: -0.09388232255975405
          vf_explained_var: 0.0022134457249194384
          vf_loss: 0.002412013772926811
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,138,1568,138000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-20_10-14-18
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.142646763059828
          entropy_coeff: 0.009999999999999998
          kl: 0.010191779241733891
          policy_loss: 0.12596582795182865
          total_loss: 0.10887968018651009
          vf_explained_var: 0.14413216710090637
          vf_loss: 0.0014380500322052588
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,139,1578.6,139000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-20_10-14-29
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0570518838034735
          entropy_coeff: 0.009999999999999998
          kl: 0.012128039810856429
          policy_loss: -0.018812795045475166
          total_loss: -0.03430077750235796
          vf_explained_var: -0.03364674746990204
          vf_loss: 0.0016288879502099007
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,140,1589.13,140000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-20_10-14-39
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9585345811314052
          entropy_coeff: 0.009999999999999998
          kl: 0.01939479334881514
          policy_loss: -0.03358491907517115
          total_loss: -0.045418596433268656
          vf_explained_var: 0.40092167258262634
          vf_loss: 0.0022287025983031426
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,141,1599.08,141000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-20_10-14-49
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.118538788954417
          entropy_coeff: 0.009999999999999998
          kl: 0.015229909445114072
          policy_loss: -0.04253272273474269
          total_loss: -0.055753869687517485
          vf_explained_var: 0.08356574922800064
          vf_loss: 0.00362728802073333
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,142,1609.43,142000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-20_10-15-00
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0512934313880073
          entropy_coeff: 0.009999999999999998
          kl: 0.016766213115103786
          policy_loss: -0.013363879463738865
          total_loss: -0.026961080957618025
          vf_explained_var: 0.2577970623970032
          vf_loss: 0.0021412935062673773
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,143,1619.83,143000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-09-20_10-15-10
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 144
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.006518816947937
          entropy_coeff: 0.009999999999999998
          kl: 0.018491346460776804
          policy_loss: -0.05266445693042543
          total_loss: -0.06422830203341114
          vf_explained_var: -0.3183142840862274
          vf_loss: 0.00323564271028671
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,144,1630.17,144000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-09-20_10-15-20
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.094816345638699
          entropy_coeff: 0.009999999999999998
          kl: 0.010580963758218095
          policy_loss: -0.07160607404592964
          total_loss: -0.08856123151878516
          vf_explained_var: 0.012523566372692585
          vf_loss: 0.0009799114839956424
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,145,1640.55,145000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-09-20_10-15-31
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.125182130601671
          entropy_coeff: 0.009999999999999998
          kl: 0.01321493217609841
          policy_loss: -0.12013750415709283
          total_loss: -0.1361681520111031
          vf_explained_var: -0.6297724843025208
          vf_loss: 0.001458014116764793
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,146,1650.9,146000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-09-20_10-15-41
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1663492229249743
          entropy_coeff: 0.009999999999999998
          kl: 0.015507453112076833
          policy_loss: -0.11980101333724127
          total_loss: -0.13502446942859225
          vf_explained_var: -0.33907070755958557
          vf_loss: 0.0020240468930246103
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,147,1661.19,147000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-09-20_10-15-52
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.108252353138394
          entropy_coeff: 0.009999999999999998
          kl: 0.01172945419841681
          policy_loss: -0.05852850121963355
          total_loss: -0.07480451543298032
          vf_explained_var: -1.0
          vf_loss: 0.001466364792173004
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,148,1671.74,148000,-0.02,0,-2,995.78


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-09-20_10-16-02
  done: false
  episode_len_mean: 995.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9497286505169338
          entropy_coeff: 0.009999999999999998
          kl: 0.008243039927758645
          policy_loss: 0.014381177288790544
          total_loss: -0.0022747294563386176
          vf_explained_var: -0.5901203155517578
          vf_loss: 0.0004940461984208216
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,149,1682.18,149000,-0.02,0,-2,995.78




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-09-20_10-16-30
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0601072669029237
          entropy_coeff: 0.009999999999999998
          kl: 0.012629532811485215
          policy_loss: -0.06891398946754634
          total_loss: -0.08479121920859647
          vf_explained_var: -0.4468943774700165
          vf_loss: 0.0011273853571765358
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,150,1709.88,150000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-09-20_10-16-43
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 151
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.984659567144182
          entropy_coeff: 0.009999999999999998
          kl: 0.011882041755736062
          policy_loss: 0.10738337544931306
          total_loss: 0.09255976097451316
          vf_explained_var: -0.512764036655426
          vf_loss: 0.0016393845728695548
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,151,1722.46,151000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-09-20_10-16-54
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9785224583413865
          entropy_coeff: 0.009999999999999998
          kl: 0.01097998407386667
          policy_loss: 0.018319577972094218
          total_loss: 0.003481548610660765
          vf_explained_var: 0.22416259348392487
          vf_loss: 0.0018204740332698243
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,152,1733.45,152000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-09-20_10-17-04
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 153
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0943585488531324
          entropy_coeff: 0.009999999999999998
          kl: 0.013534741278356677
          policy_loss: -0.028044678105248345
          total_loss: -0.04398969908555349
          vf_explained_var: -0.1459551453590393
          vf_loss: 0.0011443353697864545
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,153,1743.97,153000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-09-20_10-17-15
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 154
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0045780181884765
          entropy_coeff: 0.009999999999999998
          kl: 0.016688902885772017
          policy_loss: -0.09430966079235077
          total_loss: -0.10741137468980419
          vf_explained_var: 0.3694266378879547
          vf_loss: 0.002191641688760784
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,154,1754.51,154000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-09-20_10-17-25
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1136937061945598
          entropy_coeff: 0.009999999999999998
          kl: 0.009593520580445977
          policy_loss: -0.05266723177499241
          total_loss: -0.06974239481820001
          vf_explained_var: -0.5080550909042358
          vf_loss: 0.0013298679078515205
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,155,1765.18,155000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-09-20_10-17-36
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0457192222277323
          entropy_coeff: 0.009999999999999998
          kl: 0.013201746677134136
          policy_loss: 0.017523237152232064
          total_loss: 0.00294706995288531
          vf_explained_var: 0.24504230916500092
          vf_loss: 0.002121623217762034
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,156,1775.43,156000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-09-20_10-17-46
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.159570418463813
          entropy_coeff: 0.009999999999999998
          kl: 0.012907268227643638
          policy_loss: -0.1409534513950348
          total_loss: -0.15757562120755514
          vf_explained_var: 0.23879840970039368
          vf_loss: 0.0012979876203139105
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,157,1785.85,157000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-09-20_10-17-57
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.092960646417406
          entropy_coeff: 0.009999999999999998
          kl: 0.013815407694522257
          policy_loss: 0.032255915821426444
          total_loss: 0.01571135947273837
          vf_explained_var: -0.9935857057571411
          vf_loss: 0.0004508972985301322
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,158,1796.66,158000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-09-20_10-18-08
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.103975146346622
          entropy_coeff: 0.009999999999999998
          kl: 0.012720601196837183
          policy_loss: 0.04019200363092952
          total_loss: 0.024813906558685834
          vf_explained_var: -0.3509613573551178
          vf_loss: 0.00203926447445863
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,159,1807.47,159000,-0.02,0,-2,994.43


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-09-20_10-18-18
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 160
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0884365518887837
          entropy_coeff: 0.009999999999999998
          kl: 0.010840109524679498
          policy_loss: -0.10010338961664174
          total_loss: -0.11658958457410336
          vf_explained_var: -0.991135835647583
          vf_loss: 0.0013112780851467201
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,160,1818.2,160000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-09-20_10-18-29
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 161
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2288594749238757
          entropy_coeff: 0.009999999999999998
          kl: 0.009114892185139478
          policy_loss: 0.0076098176443742385
          total_loss: -0.01087192134000361
          vf_explained_var: -1.0
          vf_loss: 0.001211247861566436
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,161,1828.77,161000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-09-20_10-18-40
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2579626030392115
          entropy_coeff: 0.009999999999999998
          kl: 0.01793606934573412
          policy_loss: -0.14545745050741565
          total_loss: -0.16181534594959682
          vf_explained_var: -0.23274004459381104
          vf_loss: 0.0011141531583335664
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,162,1839.49,162000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-09-20_10-18-51
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1407726407051086
          entropy_coeff: 0.009999999999999998
          kl: 0.014194326096308331
          policy_loss: -0.019560462195012306
          total_loss: -0.03408653820450935
          vf_explained_var: -0.3019522726535797
          vf_loss: 0.0028395884896680297
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,163,1850.24,163000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-09-20_10-19-01
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1286831272972955
          entropy_coeff: 0.009999999999999998
          kl: 0.011526963718566355
          policy_loss: -0.043944284402661855
          total_loss: -0.06072215156422721
          vf_explained_var: -0.08772361278533936
          vf_loss: 0.001226480938364855
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,164,1860.8,164000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-09-20_10-19-12
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.18602155579461
          entropy_coeff: 0.009999999999999998
          kl: 0.012947394451262974
          policy_loss: -0.0787698013914956
          total_loss: -0.09506290025181241
          vf_explained_var: 0.14059647917747498
          vf_loss: 0.0018801430718869798
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,165,1871.66,165000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-09-20_10-19-23
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 166
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.106229323811001
          entropy_coeff: 0.009999999999999998
          kl: 0.017213433332182563
          policy_loss: 0.10911446306854486
          total_loss: 0.09354303150127331
          vf_explained_var: -0.5932773947715759
          vf_loss: 0.0005890651572877283
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,166,1882.47,166000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-09-20_10-19-34
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 167
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0743551426463656
          entropy_coeff: 0.009999999999999998
          kl: 0.013189757877997603
          policy_loss: 0.019229946720103423
          total_loss: 0.0032990467217233446
          vf_explained_var: -0.4754064381122589
          vf_loss: 0.001056660368016714
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,167,1893.37,167000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-09-20_10-19-45
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 168
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.227729540401035
          entropy_coeff: 0.009999999999999998
          kl: 0.011425200142175226
          policy_loss: -0.04173272351423899
          total_loss: -0.05987347753511535
          vf_explained_var: -0.7710734605789185
          vf_loss: 0.0008830370905343443
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,168,1904.06,168000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-09-20_10-19-56
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1694641881518892
          entropy_coeff: 0.009999999999999998
          kl: 0.01176121069999131
          policy_loss: -0.0027824120389090645
          total_loss: -0.020316467185815177
          vf_explained_var: -0.554719865322113
          vf_loss: 0.0008113969725349711
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,169,1915.05,169000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-09-20_10-20-06
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 170
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2075549337599014
          entropy_coeff: 0.009999999999999998
          kl: 0.009890217579172382
          policy_loss: -0.04122819271352556
          total_loss: -0.05981212312148677
          vf_explained_var: -0.8649802207946777
          vf_loss: 0.0006752248070875389
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,170,1925.88,170000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-09-20_10-20-17
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 171
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2497629854414196
          entropy_coeff: 0.009999999999999998
          kl: 0.009798423902025268
          policy_loss: -0.011379328618446986
          total_loss: -0.030176353620158303
          vf_explained_var: -1.0
          vf_loss: 0.0009103492840141472
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,171,1936.45,171000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-09-20_10-20-28
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 172
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1450917296939425
          entropy_coeff: 0.009999999999999998
          kl: 0.013431011947031715
          policy_loss: -0.048555030963487095
          total_loss: -0.06531579838030868
          vf_explained_var: -0.6846693158149719
          vf_loss: 0.0008654583203476957
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,172,1947.16,172000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-09-20_10-20-38
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2625931792789036
          entropy_coeff: 0.009999999999999998
          kl: 0.01234159862038984
          policy_loss: -0.05322128971003824
          total_loss: -0.07139152460214164
          vf_explained_var: -0.9090731143951416
          vf_loss: 0.0009412323525692854
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,173,1957.88,173000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-09-20_10-20-49
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2010250727335614
          entropy_coeff: 0.009999999999999998
          kl: 0.01717230870337768
          policy_loss: 0.030484720050460764
          total_loss: 0.014139676321711805
          vf_explained_var: -0.4226686358451843
          vf_loss: 0.0007751229814150268
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,174,1968.76,174000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-09-20_10-21-00
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 175
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.928760215971205
          entropy_coeff: 0.009999999999999998
          kl: 0.01644486538426724
          policy_loss: -0.08280162852671412
          total_loss: -0.0935511692530579
          vf_explained_var: 0.0693698599934578
          vf_loss: 0.0038551237566732907
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,175,1979.6,175000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-09-20_10-21-11
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 176
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.099005369345347
          entropy_coeff: 0.009999999999999998
          kl: 0.010296983524815149
          policy_loss: 0.04069431391027239
          total_loss: 0.023413066607382564
          vf_explained_var: -0.558141827583313
          vf_loss: 0.0007765798541691361
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,176,1990.52,176000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-09-20_10-21-22
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 177
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9028687159220377
          entropy_coeff: 0.009999999999999998
          kl: 0.015212006455722597
          policy_loss: -0.08038037286864387
          total_loss: -0.09377484106355244
          vf_explained_var: 0.6841613054275513
          vf_loss: 0.001302361653910743
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,177,2001.74,177000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-09-20_10-21-34
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 178
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.851017591688368
          entropy_coeff: 0.009999999999999998
          kl: 0.010100563207811388
          policy_loss: -0.10410048473212453
          total_loss: -0.11862328184975518
          vf_explained_var: -0.35334697365760803
          vf_loss: 0.0011110836028819903
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,178,2013.02,178000,-0.02,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-09-20_10-21-45
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7973589804437426
          entropy_coeff: 0.009999999999999998
          kl: 0.01578147286721937
          policy_loss: -0.07450878769159316
          total_loss: -0.07674949864546458
          vf_explained_var: 0.4351363778114319
          vf_loss: 0.01123885825476868
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,179,2023.89,179000,-0.02,0,-2,995.9




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-09-20_10-22-13
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.035971314377255
          entropy_coeff: 0.009999999999999998
          kl: 0.013746625269142538
          policy_loss: -0.03393654310041004
          total_loss: -0.04871466995941268
          vf_explained_var: -0.34330716729164124
          vf_loss: 0.0016670200718282204
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,180,2052.11,180000,-0.02,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-09-20_10-22-25
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 181
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9297977818383112
          entropy_coeff: 0.009999999999999998
          kl: 0.00645078801165483
          policy_loss: -0.17780905086547136
          total_loss: -0.19385000550084644
          vf_explained_var: -0.03796077147126198
          vf_loss: 0.0014200593445113756
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,181,2064.65,181000,-0.02,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-09-20_10-22-36
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 182
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.106863778167301
          entropy_coeff: 0.009999999999999998
          kl: 0.00993354679435754
          policy_loss: -0.06158890053629875
          total_loss: -0.07762789730396535
          vf_explained_var: 0.12965896725654602
          vf_loss: 0.00220090925706447
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,182,2074.91,182000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-09-20_10-22-46
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 183
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7346187207433912
          entropy_coeff: 0.009999999999999998
          kl: 0.01691128085902523
          policy_loss: 0.06565035548475054
          total_loss: 0.05519500159555012
          vf_explained_var: 0.23796740174293518
          vf_loss: 0.0020750810388967186
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,183,2085.11,183000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-09-20_10-22-56
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 184
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5823887573348152
          entropy_coeff: 0.009999999999999998
          kl: 0.00912328044911344
          policy_loss: -0.03659738169776069
          total_loss: -0.04379269464148416
          vf_explained_var: 0.22052106261253357
          vf_loss: 0.006030573002256763
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,184,2095.4,184000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-09-20_10-23-07
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 185
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1440253416697184
          entropy_coeff: 0.009999999999999998
          kl: 0.014853125206543026
          policy_loss: 0.05261724883069595
          total_loss: 0.037056992803182864
          vf_explained_var: -0.05988286808133125
          vf_loss: 0.001650334480089239
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,185,2105.73,185000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-09-20_10-23-17
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 186
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.101758208539751
          entropy_coeff: 0.009999999999999998
          kl: 0.016744239920348472
          policy_loss: -0.05415340728229946
          total_loss: -0.06912884778446621
          vf_explained_var: 0.11857501417398453
          vf_loss: 0.0012739624824866446
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,186,2116.35,186000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-09-20_10-23-28
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 187
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8916118992699518
          entropy_coeff: 0.009999999999999998
          kl: 0.013226066999063925
          policy_loss: -0.011202357398966948
          total_loss: -0.025469023485978445
          vf_explained_var: -0.49372321367263794
          vf_loss: 0.000883122808429309
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,187,2126.9,187000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-09-20_10-23-38
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 188
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.981636639436086
          entropy_coeff: 0.009999999999999998
          kl: 0.011229366756772387
          policy_loss: -0.12737562925451332
          total_loss: -0.14186259520550568
          vf_explained_var: 0.1471191793680191
          vf_loss: 0.0021316638714375183
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,188,2137.38,188000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-09-20_10-23-49
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8746702684296501
          entropy_coeff: 0.009999999999999998
          kl: 0.015754116694603148
          policy_loss: 0.02845566483835379
          total_loss: 0.021627549692574473
          vf_explained_var: 0.3690478503704071
          vf_loss: 0.007432356941798288
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,189,2147.66,189000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-09-20_10-23-59
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0619775070084465
          entropy_coeff: 0.009999999999999998
          kl: 0.01733050530875424
          policy_loss: -0.09788672385944261
          total_loss: -0.11166430132256613
          vf_explained_var: 0.5145321488380432
          vf_loss: 0.0019070662021274782
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,190,2158,190000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-09-20_10-24-09
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 191
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1917386876212226
          entropy_coeff: 0.009999999999999998
          kl: 0.01145200700181594
          policy_loss: -0.053165502266751395
          total_loss: -0.06829812261793348
          vf_explained_var: 0.19415634870529175
          vf_loss: 0.0035236275082247124
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,191,2168.25,191000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-09-20_10-24-20
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 192
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8474455575148265
          entropy_coeff: 0.009999999999999998
          kl: 0.029169838974656415
          policy_loss: -0.003760940002070533
          total_loss: -0.011856946680280898
          vf_explained_var: 0.43290722370147705
          vf_loss: 0.0020718808948812593
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,192,2178.53,192000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-09-20_10-24-30
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 193
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1737987094455296
          entropy_coeff: 0.009999999999999998
          kl: 0.012933760338568866
          policy_loss: -0.01346700530913141
          total_loss: -0.02865028033653895
          vf_explained_var: 0.04767125844955444
          vf_loss: 0.0010300761787220836
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,193,2189.13,193000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-09-20_10-24-41
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 194
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.212952052222358
          entropy_coeff: 0.009999999999999998
          kl: 0.013644264957834048
          policy_loss: -0.04270955744302935
          total_loss: -0.05779873981244034
          vf_explained_var: -0.2294166386127472
          vf_loss: 0.0012122047987456123
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,194,2199.72,194000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-09-20_10-24-51
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 195
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1526113828023274
          entropy_coeff: 0.009999999999999998
          kl: 0.008999690065278069
          policy_loss: -0.0893091779616144
          total_loss: -0.10457011875179079
          vf_explained_var: -0.28475940227508545
          vf_loss: 0.0024209708602736806
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,195,2210.38,195000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-09-20_10-25-02
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 196
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.151994428369734
          entropy_coeff: 0.009999999999999998
          kl: 0.010296460411519664
          policy_loss: -0.03736620419141319
          total_loss: -0.05226833290523953
          vf_explained_var: -0.3574862480163574
          vf_loss: 0.002219694418211778
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,196,2220.66,196000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-09-20_10-25-12
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 197
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8697673757870992
          entropy_coeff: 0.009999999999999998
          kl: 0.013555787911057927
          policy_loss: -0.09732058151728577
          total_loss: -0.1087632001688083
          vf_explained_var: -0.19859664142131805
          vf_loss: 0.0014647211077519588
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,197,2231.09,197000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-09-20_10-25-23
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 198
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1631674978468154
          entropy_coeff: 0.009999999999999998
          kl: 0.01110509823033467
          policy_loss: -0.062080597173836495
          total_loss: -0.07800847213301394
          vf_explained_var: -0.8792492151260376
          vf_loss: 0.0009602736992140611
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,198,2241.54,198000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-09-20_10-25-33
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 199
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.005477754275004
          entropy_coeff: 0.009999999999999998
          kl: 0.01062353710526125
          policy_loss: -0.08522236821138196
          total_loss: -0.09995575623793734
          vf_explained_var: -0.6045636534690857
          vf_loss: 0.0007835591024356998
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,199,2252,199000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-09-20_10-25-44
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 200
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1352517512109546
          entropy_coeff: 0.009999999999999998
          kl: 0.010627477107078034
          policy_loss: -0.03751291723714934
          total_loss: -0.05293100525935491
          vf_explained_var: -0.6487246155738831
          vf_loss: 0.0013949211777394845
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,200,2262.37,200000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-09-20_10-25-54
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 201
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.084183504846361
          entropy_coeff: 0.009999999999999998
          kl: 0.01836736350056215
          policy_loss: 0.007472958995236291
          total_loss: -0.004693039630850156
          vf_explained_var: -0.638526976108551
          vf_loss: 0.0008302443411796251
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,201,2272.57,201000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-09-20_10-26-04
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 202
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.076734533574846
          entropy_coeff: 0.009999999999999998
          kl: 0.00978535598736193
          policy_loss: -0.1305796946088473
          total_loss: -0.14624757965405782
          vf_explained_var: -0.3630428612232208
          vf_loss: 0.0009196595618656526
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,202,2283.03,202000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-09-20_10-26-15
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 203
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.01738123761283
          entropy_coeff: 0.009999999999999998
          kl: 0.008446461758627658
          policy_loss: -0.0024021887944804298
          total_loss: -0.018378776932756104
          vf_explained_var: -0.3696616590023041
          vf_loss: 0.0005893305065304351
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,203,2293.53,203000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-09-20_10-26-25
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 204
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.179107575946384
          entropy_coeff: 0.009999999999999998
          kl: 0.009610078810516537
          policy_loss: -0.06668769837253624
          total_loss: -0.0836644244276815
          vf_explained_var: -0.7242202758789062
          vf_loss: 0.0007094190627362372
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,204,2303.97,204000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-09-20_10-26-36
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 205
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.003146349059211
          entropy_coeff: 0.009999999999999998
          kl: 0.010731218155645629
          policy_loss: -0.14476490670090747
          total_loss: -0.15907214518843427
          vf_explained_var: -0.4148727059364319
          vf_loss: 0.001140402109336315
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,205,2314.6,205000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-09-20_10-26-46
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 206
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0178769403033785
          entropy_coeff: 0.009999999999999998
          kl: 0.008710903742620532
          policy_loss: -0.11600301994217767
          total_loss: -0.13178989713390668
          vf_explained_var: -0.39402517676353455
          vf_loss: 0.0006710415389130099
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,206,2325.09,206000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-09-20_10-26-57
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 207
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.225390511088901
          entropy_coeff: 0.009999999999999998
          kl: 0.010045889261423982
          policy_loss: -0.07688166987564829
          total_loss: -0.09397464642922083
          vf_explained_var: -1.0
          vf_loss: 0.0008698421058296744
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,207,2335.79,207000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-09-20_10-27-08
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 208
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6904793169763352
          entropy_coeff: 0.009999999999999998
          kl: 0.010818759855835304
          policy_loss: 0.0052369440595308936
          total_loss: -0.0062653564330604344
          vf_explained_var: 0.27492862939834595
          vf_loss: 0.0007812758309430339
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,208,2346.25,208000,-0.03,0,-2,995.9


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-09-20_10-27-18
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 209
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9012049873669943
          entropy_coeff: 0.009999999999999998
          kl: 0.005963817996022098
          policy_loss: -0.02372974654038747
          total_loss: -0.03964514997270372
          vf_explained_var: -0.4234120547771454
          vf_loss: 0.0005492095849250391
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,209,2356.63,209000,-0.03,0,-2,995.9




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-09-20_10-27-47
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 210
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7845515780978733
          entropy_coeff: 0.009999999999999998
          kl: 0.011753839997658035
          policy_loss: -0.14665217914928994
          total_loss: -0.1580428326709403
          vf_explained_var: 0.14485818147659302
          vf_loss: 0.0014342266768734488
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,210,2385.51,210000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-09-20_10-27-58
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 211
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.200297274854448
          entropy_coeff: 0.009999999999999998
          kl: 0.012361151771861185
          policy_loss: -0.0035252097580167983
          total_loss: -0.018919511801666684
          vf_explained_var: -0.0630386620759964
          vf_loss: 0.0013286243422448427
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,211,2396.34,211000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-09-20_10-28-09
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 212
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8359550356864929
          entropy_coeff: 0.009999999999999998
          kl: 0.014767760494288674
          policy_loss: 0.01960198043121232
          total_loss: 0.008747032491697206
          vf_explained_var: -0.13606545329093933
          vf_loss: 0.00119657795213344
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,212,2407.05,212000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-09-20_10-28-20
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 213
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.080828314357334
          entropy_coeff: 0.009999999999999998
          kl: 0.01121583057188733
          policy_loss: -0.08572609027226766
          total_loss: -0.10087246480915281
          vf_explained_var: -0.4099237322807312
          vf_loss: 0.0008710831853224793
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,213,2418.03,213000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-09-20_10-28-31
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 214
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1732866684595744
          entropy_coeff: 0.009999999999999998
          kl: 0.013595205066651772
          policy_loss: -0.03470884184870455
          total_loss: -0.04972539378537072
          vf_explained_var: -0.05317068099975586
          vf_loss: 0.000909145670529041
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,214,2429.21,214000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-09-20_10-28-42
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 215
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9623902996381124
          entropy_coeff: 0.009999999999999998
          kl: 0.013533033877618155
          policy_loss: -0.07593944379025036
          total_loss: -0.08891164975033866
          vf_explained_var: 0.14123137295246124
          vf_loss: 0.0008710784662980586
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,215,2440.36,215000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-09-20_10-28-53
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 216
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7247387568155925
          entropy_coeff: 0.009999999999999998
          kl: 0.006493534568571846
          policy_loss: -0.018123218748304578
          total_loss: -0.03164293724629614
          vf_explained_var: 0.45735085010528564
          vf_loss: 0.0009539677797066462
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,216,2451.21,216000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-09-20_10-29-04
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 217
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1572826888826158
          entropy_coeff: 0.009999999999999998
          kl: 0.009798850230153672
          policy_loss: -0.12099188081920147
          total_loss: -0.13742713845438428
          vf_explained_var: 0.09079993516206741
          vf_loss: 0.0009520039955128191
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,217,2462.33,217000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-09-20_10-29-15
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 218
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9641286108228895
          entropy_coeff: 0.009999999999999998
          kl: 0.008039645818122507
          policy_loss: -0.08342239434520403
          total_loss: -0.0986441146582365
          vf_explained_var: -0.5111039280891418
          vf_loss: 0.0009854405138563986
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,218,2473.34,218000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-09-20_10-29-26
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 219
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9442739619149103
          entropy_coeff: 0.009999999999999998
          kl: 0.011986064118752892
          policy_loss: -0.08199113913708263
          total_loss: -0.09482615039580398
          vf_explained_var: -0.8493596315383911
          vf_loss: 0.0014879002258466143
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,219,2484.22,219000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-09-20_10-29-37
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 220
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0671769195132788
          entropy_coeff: 0.009999999999999998
          kl: 0.008801913898683723
          policy_loss: -0.10067625633544391
          total_loss: -0.11550435775683986
          vf_explained_var: -0.01367012970149517
          vf_loss: 0.0020839431840512486
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,220,2495.38,220000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-09-20_10-29-48
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 221
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.103529001606835
          entropy_coeff: 0.009999999999999998
          kl: 0.013431301445596658
          policy_loss: 0.03876418024301529
          total_loss: 0.02427817036708196
          vf_explained_var: -0.49419620633125305
          vf_loss: 0.0008121192261266212
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,221,2505.94,221000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-09-20_10-29-59
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 222
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0343871739175583
          entropy_coeff: 0.009999999999999998
          kl: 0.008394809898151716
          policy_loss: -0.048820875212550165
          total_loss: -0.06441418387823634
          vf_explained_var: -0.5263194441795349
          vf_loss: 0.0011647316494620302
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,222,2516.88,222000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-09-20_10-30-10
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 223
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9952839824888442
          entropy_coeff: 0.009999999999999998
          kl: 0.0029239021920521263
          policy_loss: -0.24832698752482732
          total_loss: -0.26622952073812484
          vf_explained_var: -0.23727107048034668
          vf_loss: 0.0008013636510845067
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,223,2528.09,223000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-09-20_10-30-20
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 224
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0331083244747585
          entropy_coeff: 0.009999999999999998
          kl: 0.014569452918301156
          policy_loss: -0.08174968130058713
          total_loss: -0.0978364442785581
          vf_explained_var: -0.442316472530365
          vf_loss: 0.0011326560116786924
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,224,2538.5,224000,-0.05,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-09-20_10-30-31
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 225
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 1.9779040588272943
          entropy_coeff: 0.009999999999999998
          kl: 0.01583567294886437
          policy_loss: -0.04102839256326358
          total_loss: -0.05645749494433403
          vf_explained_var: 0.10102741420269012
          vf_loss: 0.0009678477753671662
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,225,2549.2,225000,-0.05,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-09-20_10-30-42
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 226
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 1.6760393963919746
          entropy_coeff: 0.009999999999999998
          kl: 0.015026235151964645
          policy_loss: -0.05268843670686086
          total_loss: -0.06502195878161324
          vf_explained_var: 0.15493744611740112
          vf_loss: 0.0012176591521387713
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,226,2560.08,226000,-0.05,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-09-20_10-30-53
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 227
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.1000457723935444
          entropy_coeff: 0.009999999999999998
          kl: 0.015662187876605276
          policy_loss: 0.05962922614481714
          total_loss: 0.042986496910452844
          vf_explained_var: -0.24586337804794312
          vf_loss: 0.0010126863582020937
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,227,2571.46,227000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-09-20_10-31-04
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 228
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 1.989478161599901
          entropy_coeff: 0.009999999999999998
          kl: 0.011941587308068335
          policy_loss: -0.05515762919353114
          total_loss: -0.07187500186264514
          vf_explained_var: -0.35963940620422363
          vf_loss: 0.0006269978004436578
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,228,2582.42,228000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-09-20_10-31-15
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 229
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.0394187516636317
          entropy_coeff: 0.009999999999999998
          kl: 0.01726954104064485
          policy_loss: 0.013302224708928003
          total_loss: -0.002706345087952084
          vf_explained_var: 0.2860275208950043
          vf_loss: 0.0006972909258264634
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,229,2593.47,229000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-09-20_10-31-27
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 230
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.203001884619395
          entropy_coeff: 0.009999999999999998
          kl: 0.01700193686337711
          policy_loss: 0.09161169818705983
          total_loss: 0.07383475229144096
          vf_explained_var: -0.18102052807807922
          vf_loss: 0.0006218988259206526
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,230,2604.78,230000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-09-20_10-31-38
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 231
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.094878616597917
          entropy_coeff: 0.009999999999999998
          kl: 0.012685826001738182
          policy_loss: -0.03332464227245913
          total_loss: -0.0507059791435798
          vf_explained_var: -0.021936018019914627
          vf_loss: 0.0008580843544526336
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,231,2615.87,231000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-09-20_10-31-49
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 232
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.291078800625271
          entropy_coeff: 0.009999999999999998
          kl: 0.01139309616227116
          policy_loss: -0.16300978163878124
          total_loss: -0.18216356510917345
          vf_explained_var: -0.165721595287323
          vf_loss: 0.0013237320203592794
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,232,2627.33,232000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-09-20_10-32-00
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 233
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.084522189034356
          entropy_coeff: 0.009999999999999998
          kl: 0.023799500583323367
          policy_loss: 0.007264517371853193
          total_loss: -0.00762416852845086
          vf_explained_var: 0.24631324410438538
          vf_loss: 0.0008735740935662762
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,233,2638.41,233000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-09-20_10-32-11
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 234
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.239832940366533
          entropy_coeff: 0.009999999999999998
          kl: 0.013564574021321442
          policy_loss: -0.11444414291116926
          total_loss: -0.1317592671347989
          vf_explained_var: -0.23201559484004974
          vf_loss: 0.0007376381440230438
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,234,2649.3,234000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-09-20_10-32-23
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 235
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8694673299789428
          entropy_coeff: 0.009999999999999998
          kl: 0.011846861162226219
          policy_loss: -0.035289325813452406
          total_loss: -0.0495584901008341
          vf_explained_var: -0.7645248174667358
          vf_loss: 0.000630232955801249
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,235,2660.52,235000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-09-20_10-32-34
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 236
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8304040683640375
          entropy_coeff: 0.009999999999999998
          kl: 0.01111210360881683
          policy_loss: -0.051946645064486395
          total_loss: -0.06603096458646986
          vf_explained_var: -0.60161292552948
          vf_loss: 0.0006598312545166764
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,236,2671.85,236000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-09-20_10-32-45
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 237
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1534587224324544
          entropy_coeff: 0.009999999999999998
          kl: 0.009583470513520827
          policy_loss: -0.06121024481124348
          total_loss: -0.07852937893735037
          vf_explained_var: -0.9613648056983948
          vf_loss: 0.00114527983047689
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,237,2683.3,237000,-0.03,0,-2,995.91


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-09-20_10-32-56
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 238
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2678276459376017
          entropy_coeff: 0.009999999999999998
          kl: 0.020032891013647636
          policy_loss: 0.02902478819919957
          total_loss: 0.013457170749704043
          vf_explained_var: -0.3135180175304413
          vf_loss: 0.0006928953422983695
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,238,2694.24,238000,-0.03,0,-2,995.91




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-09-20_10-33-25
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 2
  episodes_total: 240
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0678291903601753
          entropy_coeff: 0.009999999999999998
          kl: 0.007335117720759854
          policy_loss: -0.08550058282497856
          total_loss: -0.10182305263976256
          vf_explained_var: -0.1489935964345932
          vf_loss: 0.0008309878622336935
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,239,2722.75,239000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-09-20_10-33-37
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 241
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0052854710155064
          entropy_coeff: 0.009999999999999998
          kl: 0.013926205437344623
          policy_loss: -0.05369417816400528
          total_loss: -0.06601131235559782
          vf_explained_var: -0.5588250756263733
          vf_loss: 0.0010435957125284605
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,240,2734.78,240000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-09-20_10-33-48
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 242
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.152091083261702
          entropy_coeff: 0.009999999999999998
          kl: 0.015215138742067246
          policy_loss: -0.12167526288992829
          total_loss: -0.1336351732412974
          vf_explained_var: -0.0010658648097887635
          vf_loss: 0.002249489986570552
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,241,2745.52,241000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-09-20_10-33-59
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 243
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9612744278377956
          entropy_coeff: 0.009999999999999998
          kl: 0.009090884667483708
          policy_loss: 0.04763343404564593
          total_loss: 0.03325225448028909
          vf_explained_var: -0.427175909280777
          vf_loss: 0.0008630126496427693
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,242,2756.26,242000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-09-20_10-34-09
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 244
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9107349236806235
          entropy_coeff: 0.009999999999999998
          kl: 0.008465707145276684
          policy_loss: -0.0971608932233519
          total_loss: -0.11181672389308611
          vf_explained_var: -0.8093177676200867
          vf_loss: 0.00038339027103372953
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,243,2767.02,243000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-09-20_10-34-20
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 245
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9299603303273518
          entropy_coeff: 0.009999999999999998
          kl: 0.009168946482434394
          policy_loss: 0.036803304072883396
          total_loss: 0.022530085469285647
          vf_explained_var: -0.45537054538726807
          vf_loss: 0.0006203212331860817
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,244,2778.08,244000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-09-20_10-34-31
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 246
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.747136127948761
          entropy_coeff: 0.009999999999999998
          kl: 0.00817275686181631
          policy_loss: -0.022168746590614317
          total_loss: -0.0351184391313129
          vf_explained_var: -0.6329759955406189
          vf_loss: 0.0005943162816846679
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,245,2789.15,245000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-09-20_10-34-42
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 247
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.322450375556946
          entropy_coeff: 0.009999999999999998
          kl: 0.013041474896298134
          policy_loss: -0.10540606809986962
          total_loss: -0.120964798082908
          vf_explained_var: -0.2980819642543793
          vf_loss: 0.0013987996518456689
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,246,2800.1,246000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-09-20_10-34-54
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 248
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.715134506755405
          entropy_coeff: 0.009999999999999998
          kl: 0.009405714370566809
          policy_loss: -0.03473772257566452
          total_loss: -0.047009866643283105
          vf_explained_var: -0.48903125524520874
          vf_loss: 0.0003593579696219725
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,247,2811.2,247000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-09-20_10-35-04
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 249
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5676274471812779
          entropy_coeff: 0.009999999999999998
          kl: 0.0064337199183215985
          policy_loss: -0.0653656404879358
          total_loss: -0.07760446336534288
          vf_explained_var: -0.15447808802127838
          vf_loss: 0.0003457798004092183
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,248,2821.77,248000,-0.03,0,-2,994.53


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-09-20_10-35-13
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 250
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3024398379855686
          entropy_coeff: 0.009999999999999998
          kl: 0.01310685202527032
          policy_loss: 0.06274925726983283
          total_loss: 0.046249191380209394
          vf_explained_var: 0.26475757360458374
          vf_loss: 0.00022594057131249834
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,249,2830.86,249000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-09-20_10-35-23
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 251
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2677425781885785
          entropy_coeff: 0.009999999999999998
          kl: 0.005164207689687099
          policy_loss: -0.07378325673441093
          total_loss: -0.09359562293522888
          vf_explained_var: 0.4930548369884491
          vf_loss: 0.0003834397650886482
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,250,2840.53,250000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-09-20_10-35-34
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 252
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.941793613963657
          entropy_coeff: 0.009999999999999998
          kl: 0.01145341688472096
          policy_loss: 0.0940865262515015
          total_loss: 0.0808345483822955
          vf_explained_var: -0.3506098687648773
          vf_loss: 0.0006621125063651966
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,251,2851.59,251000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-09-20_10-35-45
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 253
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8156281166606478
          entropy_coeff: 0.009999999999999998
          kl: 0.009860944408083267
          policy_loss: 0.00043195860667361155
          total_loss: -0.01237530294391844
          vf_explained_var: -0.4735310971736908
          vf_loss: 0.0006104206791658523
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,252,2862.76,252000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-09-20_10-35-56
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 254
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5900250845485264
          entropy_coeff: 0.009999999999999998
          kl: 0.010443023142859697
          policy_loss: -0.035134351998567584
          total_loss: -0.04501266703009606
          vf_explained_var: -0.7638990879058838
          vf_loss: 0.0010036264614932911
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,253,2873.75,253000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-09-20_10-36-06
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 255
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1860221174028185
          entropy_coeff: 0.009999999999999998
          kl: 0.008813471361005654
          policy_loss: -0.089472117771705
          total_loss: -0.10603078802426656
          vf_explained_var: 0.3849965035915375
          vf_loss: 0.0010663083347026258
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,254,2883.19,254000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-09-20_10-36-16
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 256
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9672450105349222
          entropy_coeff: 0.009999999999999998
          kl: 0.013629460325730097
          policy_loss: 0.06029729230536355
          total_loss: 0.048420866909954285
          vf_explained_var: -0.28201034665107727
          vf_loss: 0.001246496593780143
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,255,2893.42,255000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-09-20_10-36-27
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 257
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9591962615648906
          entropy_coeff: 0.009999999999999998
          kl: 0.014667109054145379
          policy_loss: 0.06424814114967982
          total_loss: 0.0526653539803293
          vf_explained_var: -0.5437403917312622
          vf_loss: 0.0009610163921024651
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,256,2904.45,256000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-09-20_10-36-38
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 258
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9724995493888855
          entropy_coeff: 0.009999999999999998
          kl: 0.01260709494645277
          policy_loss: -0.0171350184828043
          total_loss: -0.028841014144321282
          vf_explained_var: 0.07406521588563919
          vf_loss: 0.0019607587885628972
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,257,2915.22,257000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-09-20_10-36-49
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 259
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6878492487801446
          entropy_coeff: 0.009999999999999998
          kl: 0.009240116120767174
          policy_loss: -0.01597745360599624
          total_loss: -0.027486726144949594
          vf_explained_var: -0.3351427912712097
          vf_loss: 0.000928956524714724
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,258,2926.18,258000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-09-20_10-36-59
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 260
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7294599374135335
          entropy_coeff: 0.009999999999999998
          kl: 0.009070506400679178
          policy_loss: -0.033440888424714404
          total_loss: -0.045983101261986625
          vf_explained_var: -0.4395371675491333
          vf_loss: 0.00039362737491804487
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,259,2936.64,259000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-09-20_10-37-10
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 261
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2771596352259318
          entropy_coeff: 0.009999999999999998
          kl: 0.005030382421367976
          policy_loss: -0.029030212718579505
          total_loss: -0.039194607569111715
          vf_explained_var: -0.9830976724624634
          vf_loss: 0.00018989296820816687
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,260,2947.37,260000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-09-20_10-37-19
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 262
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2534664975272283
          entropy_coeff: 0.009999999999999998
          kl: 0.008269674067908402
          policy_loss: -0.008809134032991198
          total_loss: -0.027284898857275645
          vf_explained_var: -0.6381700038909912
          vf_loss: 8.497909596674921e-05
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,261,2956.29,261000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-09-20_10-37-28
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 263
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2404077582889133
          entropy_coeff: 0.009999999999999998
          kl: 0.00652545211563162
          policy_loss: -0.03540974744699067
          total_loss: -0.054613781202998424
          vf_explained_var: -0.5384316444396973
          vf_loss: 6.429059261184497e-05
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,262,2965.17,262000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-09-20_10-37-37
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 264
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3062881787618
          entropy_coeff: 0.009999999999999998
          kl: 0.0032422940085756104
          policy_loss: 0.18809778425428603
          total_loss: 0.16664911011854808
          vf_explained_var: -0.24816852807998657
          vf_loss: 5.614735766559736e-05
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,263,2974.29,263000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-09-20_10-37-48
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 265
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.910600451628367
          entropy_coeff: 0.009999999999999998
          kl: 0.01868825552680138
          policy_loss: 0.10871809331907166
          total_loss: 0.09466825301448505
          vf_explained_var: -0.9307423233985901
          vf_loss: 0.0005659197621247989
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,264,2985.56,264000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-09-20_10-38-00
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.0312184744411046
          entropy_coeff: 0.009999999999999998
          kl: 0.016162834064480137
          policy_loss: 0.11201443436245123
          total_loss: 0.09757983821133773
          vf_explained_var: -0.6450928449630737
          vf_loss: 0.0019941291564868555
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,265,2997.07,265000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-09-20_10-38-11
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 267
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.9743323630756802
          entropy_coeff: 0.009999999999999998
          kl: 0.012367712306450665
          policy_loss: 0.05395416520122025
          total_loss: 0.037796618018506305
          vf_explained_var: -0.9600528478622437
          vf_loss: 0.0006141716345963586
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,266,3007.75,266000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-09-20_10-38-21
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 268
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.8282794144418504
          entropy_coeff: 0.009999999999999998
          kl: 0.021574264622784525
          policy_loss: 0.03264879571894805
          total_loss: 0.02095914036035538
          vf_explained_var: -0.6433346271514893
          vf_loss: 0.0014094692873994872
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,267,3018.61,267000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-09-20_10-38-32
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 269
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.7881939040289985
          entropy_coeff: 0.009999999999999998
          kl: 0.01053387563817198
          policy_loss: -0.0906694204443031
          total_loss: -0.10400934211081929
          vf_explained_var: -0.646027147769928
          vf_loss: 0.0007455378583270228
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,268,3029.58,268000,-0.03,0,-2,995.88




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-09-20_10-39-01
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 270
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.7593079937828913
          entropy_coeff: 0.009999999999999998
          kl: 0.010666970169841179
          policy_loss: -0.009146047931992345
          total_loss: -0.02253439367438356
          vf_explained_var: -0.707702100276947
          vf_loss: 0.0003602871862919225
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,269,3057.71,269000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-09-20_10-39-13
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 271
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.6960990322960747
          entropy_coeff: 0.009999999999999998
          kl: 0.007958505293995069
          policy_loss: -0.015805207813779514
          total_loss: -0.029336113565497927
          vf_explained_var: 0.12186311185359955
          vf_loss: 0.0005617883512362217
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,270,3069.87,270000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-09-20_10-39-23
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 272
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.102936953968472
          entropy_coeff: 0.009999999999999998
          kl: 0.015551085638962698
          policy_loss: 0.0017184935406678254
          total_loss: -0.013132775430050161
          vf_explained_var: -0.5887979865074158
          vf_loss: 0.0005733861683337536
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,271,3080.31,271000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-09-20_10-39-34
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 273
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.687683375676473
          entropy_coeff: 0.009999999999999998
          kl: 0.016004676253116162
          policy_loss: 0.0017955397255718709
          total_loss: -0.008596361490587394
          vf_explained_var: -0.9943503737449646
          vf_loss: 0.0007167430862965476
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,272,3090.85,272000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-09-20_10-39-44
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 274
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.943103505505456
          entropy_coeff: 0.009999999999999998
          kl: 0.019855326589920164
          policy_loss: -0.00888281421115001
          total_loss: -0.018906293768021797
          vf_explained_var: 0.40518859028816223
          vf_loss: 0.002251566650940933
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,273,3101.31,273000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-09-20_10-39-55
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 275
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.7917432175742256
          entropy_coeff: 0.009999999999999998
          kl: 0.012362540655259657
          policy_loss: 0.1505562533934911
          total_loss: 0.13811403032806185
          vf_explained_var: -0.9964312314987183
          vf_loss: 0.0010196681147337787
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,274,3112.15,274000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-09-20_10-40-07
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 276
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.1637374771965874
          entropy_coeff: 0.009999999999999998
          kl: 0.010752577388766503
          policy_loss: -0.08254966255691316
          total_loss: -0.09904369331068463
          vf_explained_var: -0.5814963579177856
          vf_loss: 0.0012680446635284978
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,275,3124.07,275000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-09-20_10-40-18
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 277
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.8137380825148688
          entropy_coeff: 0.009999999999999998
          kl: 0.012368560181209846
          policy_loss: -0.033246332448389794
          total_loss: -0.04589683877097236
          vf_explained_var: -0.543349027633667
          vf_loss: 0.0010291644847408557
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,276,3135.38,276000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-09-20_10-40-30
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 278
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.9410778919855753
          entropy_coeff: 0.009999999999999998
          kl: 0.011715957045053344
          policy_loss: -0.023086720787816577
          total_loss: -0.03698825273248885
          vf_explained_var: 0.2708693742752075
          vf_loss: 0.0012867406492457828
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,277,3146.69,277000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-09-20_10-40-41
  done: false
  episode_len_mean: 994.51
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 279
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.931872186395857
          entropy_coeff: 0.009999999999999998
          kl: 0.016709157067631906
          policy_loss: -0.13065430654419793
          total_loss: -0.14244303703308106
          vf_explained_var: -0.06429778784513474
          vf_loss: 0.0015079031905366315
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,278,3157.74,278000,-0.03,0,-2,994.51


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-09-20_10-40-52
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 280
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.9238452447785273
          entropy_coeff: 0.009999999999999998
          kl: 0.026848175502043287
          policy_loss: -0.0024533579746882123
          total_loss: -0.011444722198777728
          vf_explained_var: -0.2887161672115326
          vf_loss: 0.0005708344817523741
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,279,3168.85,279000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-09-20_10-41-03
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 281
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.000459575653076
          entropy_coeff: 0.009999999999999998
          kl: 0.012352052470018925
          policy_loss: -0.05697365612205532
          total_loss: -0.06937831091798013
          vf_explained_var: 0.10399818420410156
          vf_loss: 0.0009223019270898982
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,280,3179.75,280000,-0.03,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-09-20_10-41-14
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 282
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.95896495713128
          entropy_coeff: 0.009999999999999998
          kl: 0.014503246714543196
          policy_loss: 0.07665938784678777
          total_loss: 0.06604227465060022
          vf_explained_var: -0.07551947981119156
          vf_loss: 0.001131942910918345
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,281,3190.93,281000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-09-20_10-41-25
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 283
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0114745047357347
          entropy_coeff: 0.009999999999999998
          kl: 0.008322783834476431
          policy_loss: 0.06859834906127718
          total_loss: 0.053460049877564114
          vf_explained_var: -0.9302147626876831
          vf_loss: 0.0004770673577796616
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,282,3201.99,282000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-09-20_10-41-36
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 284
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.030436250898573
          entropy_coeff: 0.009999999999999998
          kl: 0.01379034395309095
          policy_loss: -0.04839292052719328
          total_loss: -0.059482491761446
          vf_explained_var: -0.004065999761223793
          vf_loss: 0.001759597561451503
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,283,3213,283000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-09-20_10-41-47
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 285
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8681281632847255
          entropy_coeff: 0.009999999999999998
          kl: 0.014004274622271565
          policy_loss: -0.05884310234751966
          total_loss: -0.0687549441949361
          vf_explained_var: -0.6885591149330139
          vf_loss: 0.001198595870583732
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,284,3224.24,284000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-09-20_10-41-58
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 286
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0707671258184646
          entropy_coeff: 0.009999999999999998
          kl: 0.009221240770888626
          policy_loss: -0.01973360785179668
          total_loss: -0.03487013073431121
          vf_explained_var: -0.6526657342910767
          vf_loss: 0.0005860555879836385
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,285,3235.05,285000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-09-20_10-42-09
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 287
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6327748417854309
          entropy_coeff: 0.009999999999999998
          kl: 0.005424359522020773
          policy_loss: -0.023253271645969815
          total_loss: -0.036274083620972106
          vf_explained_var: -0.10821043699979782
          vf_loss: 0.0003744732758301931
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,286,3246.2,286000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-09-20_10-42-21
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 288
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8399222572644551
          entropy_coeff: 0.009999999999999998
          kl: 0.010670195564680659
          policy_loss: -0.014838238350219198
          total_loss: -0.02707422429488765
          vf_explained_var: 0.4296565055847168
          vf_loss: 0.00039482516650524407
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,287,3257.35,287000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-09-20_10-42-32
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 289
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.3350334458880955
          entropy_coeff: 0.009999999999999998
          kl: 0.005715337919018282
          policy_loss: 0.09307926446199417
          total_loss: 0.08310847340358628
          vf_explained_var: 0.6060327291488647
          vf_loss: 0.0002897754773989113
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,288,3268.68,288000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-09-20_10-42-43
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 290
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0185413201649984
          entropy_coeff: 0.009999999999999998
          kl: 0.00860862594814904
          policy_loss: 0.014915831055906085
          total_loss: 0.00013215492169062296
          vf_explained_var: -0.36745303869247437
          vf_loss: 0.0007478292193910521
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,289,3279.78,289000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-09-20_10-42-54
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 291
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8916841559939914
          entropy_coeff: 0.009999999999999998
          kl: 0.008246463667407462
          policy_loss: 0.055681240641408496
          total_loss: 0.04182299772898356
          vf_explained_var: -0.9724465012550354
          vf_loss: 0.0006004810553147561
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,290,3290.67,290000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-09-20_10-43-05
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 292
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.918113558822208
          entropy_coeff: 0.009999999999999998
          kl: 0.0067884976525803215
          policy_loss: 0.05839735390618443
          total_loss: 0.04312457558181551
          vf_explained_var: -0.6783560514450073
          vf_loss: 0.00023843018263707764
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,291,3302,291000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-09-20_10-43-17
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 293
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6924762010574341
          entropy_coeff: 0.009999999999999998
          kl: 0.008989297762146364
          policy_loss: 0.12049550174011124
          total_loss: 0.10923779908981589
          vf_explained_var: -0.20721906423568726
          vf_loss: 0.0008073595360555272
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,292,3313.54,292000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-09-20_10-43-28
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 294
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0746490054660374
          entropy_coeff: 0.009999999999999998
          kl: 0.008367594915510108
          policy_loss: 0.10696957976453834
          total_loss: 0.0912112014575137
          vf_explained_var: -0.6515222191810608
          vf_loss: 0.0004645088642266475
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,293,3324.85,293000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-09-20_10-43-40
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 295
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8460317730903626
          entropy_coeff: 0.009999999999999998
          kl: 0.011366775896237573
          policy_loss: -0.019708476670914226
          total_loss: -0.030858026320735612
          vf_explained_var: -0.16267959773540497
          vf_loss: 0.0011657780466420161
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,294,3336.19,294000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-09-20_10-43-51
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 296
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9975844846831428
          entropy_coeff: 0.009999999999999998
          kl: 0.0108961609972479
          policy_loss: -0.04829165766843491
          total_loss: -0.061585019218424956
          vf_explained_var: -0.6204128861427307
          vf_loss: 0.0007919146483699377
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,295,3347.54,295000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-09-20_10-44-02
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 297
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0425413383377924
          entropy_coeff: 0.009999999999999998
          kl: 0.00923652380751538
          policy_loss: 0.08185073932011923
          total_loss: 0.06718219075765874
          vf_explained_var: -0.2039126306772232
          vf_loss: 0.0007635102246745697
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,296,3358.81,296000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-09-20_10-44-15
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 298
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.061137431197696
          entropy_coeff: 0.009999999999999998
          kl: 0.008380897770320573
          policy_loss: 0.15413998688260713
          total_loss: 0.13912271966950762
          vf_explained_var: -0.09341638535261154
          vf_loss: 0.0010633126535038982
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,297,3371.11,297000,-0.02,0,-2,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-09-20_10-44-27
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 299
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.809884656800164
          entropy_coeff: 0.009999999999999998
          kl: 0.008603183795868259
          policy_loss: 0.05093645641269783
          total_loss: 0.038096140920081074
          vf_explained_var: -0.12435705214738846
          vf_loss: 0.0006075661774957552
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,298,3383.66,298000,-0.02,0,-2,995.88




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-09-20_10-44-56
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 300
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7905503378974066
          entropy_coeff: 0.009999999999999998
          kl: 0.016419507151315346
          policy_loss: -0.02847458449088865
          total_loss: 0.006327413291566901
          vf_explained_var: 0.041837725788354874
          vf_loss: 0.04383095637507116
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,299,3412.23,299000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-09-20_10-45-08
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 301
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9949704647064208
          entropy_coeff: 0.009999999999999998
          kl: 0.01721405544934665
          policy_loss: -0.010198116054137547
          total_loss: -0.017961664580636555
          vf_explained_var: 0.4735671877861023
          vf_loss: 0.0028800738548549515
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,300,3424.06,300000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-09-20_10-45-18
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 302
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9438603851530287
          entropy_coeff: 0.009999999999999998
          kl: 0.011981223401381887
          policy_loss: 0.003503947994775242
          total_loss: -0.007131182278196017
          vf_explained_var: 0.05139264464378357
          vf_loss: 0.002326307279549332
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,301,3433.89,301000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-09-20_10-45-27
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 303
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.0039694984753926
          entropy_coeff: 0.009999999999999998
          kl: 0.016155580369932258
          policy_loss: -0.005358210412992372
          total_loss: -0.014967958629131316
          vf_explained_var: 0.5456902980804443
          vf_loss: 0.0016960825383042295
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,302,3443.62,302000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-09-20_10-45-37
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 304
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8755589803059896
          entropy_coeff: 0.009999999999999998
          kl: 0.0073523532711009115
          policy_loss: -0.022939568902883265
          total_loss: -0.03670684998441073
          vf_explained_var: -0.7802573442459106
          vf_loss: 0.001013555348941332
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,303,3453.19,303000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-09-20_10-45-47
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 305
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6953318211767407
          entropy_coeff: 0.009999999999999998
          kl: 0.01205053069773765
          policy_loss: -0.09445230346173047
          total_loss: -0.10303681651130318
          vf_explained_var: 0.41491517424583435
          vf_loss: 0.0018541720741066254
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,304,3463.06,304000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-09-20_10-45-57
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 306
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.947467189364963
          entropy_coeff: 0.009999999999999998
          kl: 0.009712935914261086
          policy_loss: -0.1221901125792
          total_loss: -0.13482701459692584
          vf_explained_var: 0.6289328932762146
          vf_loss: 0.0015868588273103038
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,305,3473.66,305000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-09-20_10-46-08
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 307
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8805075526237487
          entropy_coeff: 0.009999999999999998
          kl: 0.009811327252743791
          policy_loss: -0.11035167516933547
          total_loss: -0.12172435273726781
          vf_explained_var: 0.6724485158920288
          vf_loss: 0.0021282950894803638
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,306,3484.07,306000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-09-20_10-46-18
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 308
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8887527015474108
          entropy_coeff: 0.009999999999999998
          kl: 0.012732322619914981
          policy_loss: -0.07551529332995414
          total_loss: -0.08592779090007147
          vf_explained_var: -0.35899651050567627
          vf_loss: 0.0015918112230590648
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,307,3494.48,307000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-09-20_10-46-28
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 309
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8873982654677497
          entropy_coeff: 0.009999999999999998
          kl: 0.015962125144868743
          policy_loss: -0.08213117898752292
          total_loss: -0.09085779384606414
          vf_explained_var: -0.6224194169044495
          vf_loss: 0.0015180851122648972
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,308,3504.63,308000,-0.02,0,-2,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-09-20_10-46-39
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 310
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7718158562978108
          entropy_coeff: 0.009999999999999998
          kl: 0.012125223901225328
          policy_loss: -0.055092759989202024
          total_loss: -0.06446616227428119
          vf_explained_var: -0.8369994163513184
          vf_loss: 0.0017897434978901099
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,309,3514.91,309000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-09-20_10-46-49
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 311
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7340350773599413
          entropy_coeff: 0.009999999999999998
          kl: 0.011231726956947666
          policy_loss: -0.07738058293859164
          total_loss: -0.08707094275289112
          vf_explained_var: -0.2602353096008301
          vf_loss: 0.0015780124011346036
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,310,3525.34,310000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-09-20_10-47-00
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 312
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.747229023774465
          entropy_coeff: 0.009999999999999998
          kl: 0.009992715043404803
          policy_loss: -0.07814139065643151
          total_loss: -0.08950086790654395
          vf_explained_var: 0.1460922211408615
          vf_loss: 0.0007106545122547282
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,311,3536,311000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-09-20_10-47-11
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 313
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7449092043770684
          entropy_coeff: 0.009999999999999998
          kl: 0.011831804054317432
          policy_loss: -0.01544135750995742
          total_loss: -0.025353673431608412
          vf_explained_var: -1.0
          vf_loss: 0.001140388004326572
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,312,3546.83,312000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-09-20_10-47-21
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 314
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7130690958764818
          entropy_coeff: 0.009999999999999998
          kl: 0.012619835603657098
          policy_loss: -0.05032253202257885
          total_loss: -0.05899775771734615
          vf_explained_var: -1.0
          vf_loss: 0.0016330600237577325
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,313,3557.2,313000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-09-20_10-47-32
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 315
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.8665450440512763
          entropy_coeff: 0.009999999999999998
          kl: 0.020885423647906107
          policy_loss: -0.022844653824965158
          total_loss: -0.02801751486129231
          vf_explained_var: -0.1751057654619217
          vf_loss: 0.002201728167064074
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,314,3567.67,314000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-09-20_10-47-42
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 316
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8186957505014207
          entropy_coeff: 0.009999999999999998
          kl: 0.007806524369608553
          policy_loss: -0.0066756411662532225
          total_loss: -0.01730455056660705
          vf_explained_var: -0.750335156917572
          vf_loss: 0.0012276231181911296
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,315,3578.21,315000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-09-20_10-47-53
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 317
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.5697224564022487
          entropy_coeff: 0.009999999999999998
          kl: 0.00828612456800065
          policy_loss: -0.012243078007466263
          total_loss: -0.020328491657144492
          vf_explained_var: -0.7366056442260742
          vf_loss: 0.0008924730613620745
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,316,3588.81,316000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-09-20_10-48-04
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 318
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.563564842277103
          entropy_coeff: 0.009999999999999998
          kl: 0.004321402757333913
          policy_loss: -0.00048337398717800774
          total_loss: -0.011978410619000594
          vf_explained_var: -1.0
          vf_loss: 0.0006363219106181835
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,317,3599.53,317000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-09-20_10-48-14
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 319
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.7728585958480836
          entropy_coeff: 0.009999999999999998
          kl: 0.009794852512048516
          policy_loss: -0.08391383373075062
          total_loss: -0.09615049593978459
          vf_explained_var: -0.8206034302711487
          vf_loss: 0.001520527598525708
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,318,3610.13,318000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-09-20_10-48-25
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 320
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8037275910377502
          entropy_coeff: 0.009999999999999998
          kl: 0.011329662377837361
          policy_loss: -0.046173419720596735
          total_loss: -0.058962669213198954
          vf_explained_var: -0.493179053068161
          vf_loss: 0.0006543302620735227
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,319,3620.83,319000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-09-20_10-48-36
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 321
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9194135705629984
          entropy_coeff: 0.009999999999999998
          kl: 0.018963495354736626
          policy_loss: -0.014875595023234685
          total_loss: -0.0252363334927294
          vf_explained_var: -0.22332662343978882
          vf_loss: 0.0011445122091875723
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,320,3631.71,320000,-0.02,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-09-20_10-48-47
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 322
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.6199654499689737
          entropy_coeff: 0.009999999999999998
          kl: 0.013707445279856106
          policy_loss: -0.0027510821405384274
          total_loss: -0.007260130387213495
          vf_explained_var: 0.5955359935760498
          vf_loss: 0.0061328223007472435
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,321,3642.7,321000,-0.03,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-09-20_10-48-58
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 323
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.4816246377097235
          entropy_coeff: 0.009999999999999998
          kl: 0.011028205241972146
          policy_loss: -0.12422055666231446
          total_loss: -0.13320479943520494
          vf_explained_var: 0.24199725687503815
          vf_loss: 0.0013605371744941093
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,322,3653.65,322000,-0.03,0,-2,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-09-20_10-49-09
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 324
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.0810250394874148
          entropy_coeff: 0.009999999999999998
          kl: 0.012230150959284458
          policy_loss: -0.25288767367601395
          total_loss: -0.2578519797987408
          vf_explained_var: -0.02381834201514721
          vf_loss: 0.0008871434429440544
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,323,3664.36,323000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-09-20_10-49-19
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 325
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.7131269428465101
          entropy_coeff: 0.009999999999999998
          kl: 0.013095579343094244
          policy_loss: -0.06989991416533788
          total_loss: -0.07824290895627604
          vf_explained_var: 0.3987870216369629
          vf_loss: 0.0034785797712781155
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,324,3675.13,324000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-09-20_10-49-30
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 326
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.870251903269026
          entropy_coeff: 0.009999999999999998
          kl: 0.0148079621586018
          policy_loss: -0.07404629927542475
          total_loss: -0.08430833361215062
          vf_explained_var: 0.016492793336510658
          vf_loss: 0.0024364891073976954
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,325,3686.07,325000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-09-20_10-49-41
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 327
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.886760343445672
          entropy_coeff: 0.009999999999999998
          kl: 0.01667941847036529
          policy_loss: -0.036847026439176665
          total_loss: -0.04725706121987767
          vf_explained_var: -0.028647324070334435
          vf_loss: 0.001694775273790583
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,326,3697.01,326000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-09-20_10-49-52
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 328
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8648561795552572
          entropy_coeff: 0.009999999999999998
          kl: 0.013438327318378759
          policy_loss: -0.06949953602419959
          total_loss: -0.08120179822047552
          vf_explained_var: -0.3186487555503845
          vf_loss: 0.0014976335783204477
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,327,3707.91,327000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-09-20_10-50-03
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 329
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9612156179216174
          entropy_coeff: 0.009999999999999998
          kl: 0.011606822977985824
          policy_loss: -0.027778062990142238
          total_loss: -0.04112515008697907
          vf_explained_var: -0.14528556168079376
          vf_loss: 0.0015589984877604163
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,328,3718.5,328000,-0.01,0,-1,995.87




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-09-20_10-50-31
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 330
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.6474666661686368
          entropy_coeff: 0.009999999999999998
          kl: 0.015174591815599816
          policy_loss: -0.0740691903564665
          total_loss: -0.08183638652165731
          vf_explained_var: 0.05016997829079628
          vf_loss: 0.0025548203266225755
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,329,3746.9,329000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-09-20_10-50-41
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 331
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 2.0169853700531855
          entropy_coeff: 0.009999999999999998
          kl: 0.00859781444803383
          policy_loss: -0.0848570140104534
          total_loss: -0.10067851868872013
          vf_explained_var: -0.6564794182777405
          vf_loss: 0.0008623032371461805
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,330,3757.08,330000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-09-20_10-50-52
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 332
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.5329846249686347
          entropy_coeff: 0.009999999999999998
          kl: 0.03949135070864563
          policy_loss: -0.039820499821669526
          total_loss: -0.02841847269899315
          vf_explained_var: 0.15261149406433105
          vf_loss: 0.01071981567527271
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,331,3767.36,331000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-09-20_10-51-02
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 333
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.979060267077552
          entropy_coeff: 0.009999999999999998
          kl: 0.009016473082263943
          policy_loss: -0.10538320764899253
          total_loss: -0.11816666159364912
          vf_explained_var: -0.2855156660079956
          vf_loss: 0.0015234566325994414
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,332,3777.6,332000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-09-20_10-51-12
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 334
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.688345472017924
          entropy_coeff: 0.009999999999999998
          kl: 0.01019368195598916
          policy_loss: -0.07773552669419183
          total_loss: -0.08618392737375366
          vf_explained_var: -0.199337899684906
          vf_loss: 0.0022354034357704223
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,333,3787.72,333000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-09-20_10-51-22
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 335
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6830611824989319
          entropy_coeff: 0.009999999999999998
          kl: 0.009091837603557036
          policy_loss: -0.0014435506322317652
          total_loss: -0.01081117902778917
          vf_explained_var: 0.3212560713291168
          vf_loss: 0.0019334549653447337
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,334,3798.03,334000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-09-20_10-51-33
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 336
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9292768438657124
          entropy_coeff: 0.009999999999999998
          kl: 0.009369979300950978
          policy_loss: -0.08377193667822413
          total_loss: -0.0966934902800454
          vf_explained_var: -0.5132607221603394
          vf_loss: 0.0006725247419025335
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,335,3808.36,335000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-09-20_10-51-43
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 337
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6856680909792583
          entropy_coeff: 0.009999999999999998
          kl: 0.011008265454796036
          policy_loss: -0.08503080142868889
          total_loss: -0.08824968057063719
          vf_explained_var: -0.3232773542404175
          vf_loss: 0.006942731771980308
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,336,3818.83,336000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-09-20_10-51-54
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 338
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2228057709005145
          entropy_coeff: 0.009999999999999998
          kl: 0.009186776838152848
          policy_loss: -0.09368549858530363
          total_loss: -0.09760434627532959
          vf_explained_var: 0.1517707258462906
          vf_loss: 0.0027219413404559922
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,337,3829.15,337000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-09-20_10-52-04
  done: false
  episode_len_mean: 994.45
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 339
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8984643353356256
          entropy_coeff: 0.009999999999999998
          kl: 0.008881942140023035
          policy_loss: -0.07452462166547776
          total_loss: -0.0873049985203478
          vf_explained_var: -0.47122472524642944
          vf_loss: 0.0008023935438056166
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,338,3839.39,338000,-0.01,0,-1,994.45


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-09-20_10-52-14
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 340
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2264016065332624
          entropy_coeff: 0.009999999999999998
          kl: 0.005519062960332929
          policy_loss: -0.10677755607499016
          total_loss: -0.11483494308259752
          vf_explained_var: -0.3203792870044708
          vf_loss: 0.0008500132628897619
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,339,3849.81,339000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-09-20_10-52-25
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 341
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.025268618265788
          entropy_coeff: 0.009999999999999998
          kl: 0.010576220102798621
          policy_loss: 0.025785790632168452
          total_loss: 0.012534950425227483
          vf_explained_var: -0.5918915867805481
          vf_loss: 0.000569538078788254
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,340,3860.15,340000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-09-20_10-52-35
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 342
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.661635650528802
          entropy_coeff: 0.009999999999999998
          kl: 0.011228748196677607
          policy_loss: -0.04274010078774558
          total_loss: -0.051156511571672225
          vf_explained_var: -0.3397802412509918
          vf_loss: 0.0013707781695605566
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,341,3870.54,341000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-09-20_10-52-46
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 343
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8306031505266824
          entropy_coeff: 0.009999999999999998
          kl: 0.00899878143239209
          policy_loss: 0.09700491498741838
          total_loss: 0.08511161055001948
          vf_explained_var: -0.008797891438007355
          vf_loss: 0.0009397945150137982
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,342,3881.01,342000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-09-20_10-52-56
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 344
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3523992829852634
          entropy_coeff: 0.009999999999999998
          kl: 0.010225516171985147
          policy_loss: -0.10172886732551786
          total_loss: -0.10710753616359499
          vf_explained_var: -0.7604258060455322
          vf_loss: 0.0019263081795846424
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,343,3891.41,343000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-09-20_10-53-06
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 345
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7761719796392652
          entropy_coeff: 0.009999999999999998
          kl: 0.008415811103602582
          policy_loss: 0.012974415346980095
          total_loss: 0.002208592463284731
          vf_explained_var: -0.9656311869621277
          vf_loss: 0.001877517158087964
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,344,3901.74,344000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-09-20_10-53-17
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 346
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3364394161436293
          entropy_coeff: 0.009999999999999998
          kl: 0.005736783990244179
          policy_loss: 0.03589568022224638
          total_loss: 0.026756393329964743
          vf_explained_var: -0.4243237376213074
          vf_loss: 0.0007360744138067174
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,345,3912.1,345000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-09-20_10-53-27
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 347
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5573357396655612
          entropy_coeff: 0.009999999999999998
          kl: 0.0072809228044818835
          policy_loss: -0.058141652189402114
          total_loss: -0.06822809044064747
          vf_explained_var: 0.010736346244812012
          vf_loss: 0.0010587605570132534
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,346,3922.46,346000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-09-20_10-53-37
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 348
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7233409431245592
          entropy_coeff: 0.009999999999999998
          kl: 0.008031253167177689
          policy_loss: -0.09389852492345704
          total_loss: -0.10500289855731858
          vf_explained_var: -0.02353358082473278
          vf_loss: 0.0012445413378170795
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,347,3932.74,347000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-09-20_10-53-48
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 349
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245220455858442
          entropy_coeff: 0.009999999999999998
          kl: 0.013344929213880037
          policy_loss: -0.017315981537103654
          total_loss: -0.021244467629326715
          vf_explained_var: -0.8706501722335815
          vf_loss: 0.001200534233228407
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,348,3943.22,348000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-09-20_10-53-58
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 350
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5082452456156412
          entropy_coeff: 0.009999999999999998
          kl: 0.013357533656562066
          policy_loss: -0.018051489111449985
          total_loss: -0.023323331276575723
          vf_explained_var: -0.44739672541618347
          vf_loss: 0.001686745316773239
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,349,3953.6,349000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-09-20_10-54-10
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 351
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3890292432573106
          entropy_coeff: 0.009999999999999998
          kl: 0.00835044686705161
          policy_loss: -0.086578376011716
          total_loss: -0.0935351861640811
          vf_explained_var: 0.2791643440723419
          vf_loss: 0.0018548576361758428
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,350,3964.92,350000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-09-20_10-54-21
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 352
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.797961163520813
          entropy_coeff: 0.009999999999999998
          kl: 0.0127029634348449
          policy_loss: -0.05308809619810846
          total_loss: -0.06178599341462056
          vf_explained_var: -0.6081445813179016
          vf_loss: 0.001555949338944629
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,351,3976.06,351000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-09-20_10-54-32
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 353
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7781921717855664
          entropy_coeff: 0.009999999999999998
          kl: 0.01056886362011518
          policy_loss: -0.07901222080820136
          total_loss: -0.08897791153026952
          vf_explained_var: -0.9851500391960144
          vf_loss: 0.0013883986639686757
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,352,3986.72,352000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-09-20_10-54-42
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 354
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2823723395665487
          entropy_coeff: 0.009999999999999998
          kl: 0.010755297319974844
          policy_loss: -0.014616735610697004
          total_loss: -0.018549224237600963
          vf_explained_var: 0.27713140845298767
          vf_loss: 0.00235001179907057
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,353,3997.05,353000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-09-20_10-54-52
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 355
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2231032894717322
          entropy_coeff: 0.009999999999999998
          kl: 0.00983837545099217
          policy_loss: -0.09316289871931076
          total_loss: -0.09786164263884227
          vf_explained_var: 0.22345517575740814
          vf_loss: 0.001548726023368848
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,354,4007.46,354000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-09-20_10-55-03
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 356
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6621894001960755
          entropy_coeff: 0.009999999999999998
          kl: 0.013192824748083767
          policy_loss: -0.026060366278721228
          total_loss: -0.033734940530525316
          vf_explained_var: -1.0
          vf_loss: 0.0009236289576316873
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,355,4017.79,355000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-09-20_10-55-13
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 357
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7551595316992865
          entropy_coeff: 0.009999999999999998
          kl: 0.008348230566311917
          policy_loss: -0.04212605512390534
          total_loss: -0.05324752912339237
          vf_explained_var: -0.5034589171409607
          vf_loss: 0.0013528449677525916
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,356,4028.06,356000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-09-20_10-55-24
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 358
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7473497986793518
          entropy_coeff: 0.009999999999999998
          kl: 0.01185837671313838
          policy_loss: -0.05701265782117844
          total_loss: -0.06591226856948601
          vf_explained_var: -0.838439404964447
          vf_loss: 0.001361789150784413
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,357,4038.72,357000,-0.01,0,-1,995.83


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-09-20_10-55-34
  done: false
  episode_len_mean: 995.83
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 359
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.735439772076077
          entropy_coeff: 0.009999999999999998
          kl: 0.010391633076366327
          policy_loss: -0.09072039205994871
          total_loss: -0.09909183792769909
          vf_explained_var: -0.5399098992347717
          vf_loss: 0.002662905705316613
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,358,4049.08,358000,-0.01,0,-1,995.83




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-09-20_10-56-02
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 360
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8907342301474677
          entropy_coeff: 0.009999999999999998
          kl: 0.010381341907389293
          policy_loss: 0.006982209616237216
          total_loss: -0.004506669814387957
          vf_explained_var: -0.6952505111694336
          vf_loss: 0.0011046747896923787
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,359,4077.48,359000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-09-20_10-56-13
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 361
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.77456876039505
          entropy_coeff: 0.009999999999999998
          kl: 0.011820411446950433
          policy_loss: 0.019297995335525935
          total_loss: 0.0149624432126681
          vf_explained_var: -0.45867758989334106
          vf_loss: 0.006221126183582884
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,360,4087.81,360000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-09-20_10-56-23
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 362
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5793175458908082
          entropy_coeff: 0.009999999999999998
          kl: 0.006647344995759339
          policy_loss: -0.00971400292797221
          total_loss: -0.020585074979397985
          vf_explained_var: -0.8837099671363831
          vf_loss: 0.000879282683147014
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,361,4097.8,361000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-09-20_10-56-33
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 363
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5434486091136932
          entropy_coeff: 0.009999999999999998
          kl: 0.011834501570686011
          policy_loss: -0.041211957360307375
          total_loss: -0.04825195976429515
          vf_explained_var: -0.16804921627044678
          vf_loss: 0.0011969049506458558
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,362,4107.76,362000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-09-20_10-56-43
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 364
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4342959576182894
          entropy_coeff: 0.009999999999999998
          kl: 0.008332905257242413
          policy_loss: -0.12313013217515416
          total_loss: -0.13071352276537154
          vf_explained_var: -0.08252442628145218
          vf_loss: 0.0016916164229365273
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,363,4117.94,363000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-09-20_10-56-53
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 365
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8756020029385885
          entropy_coeff: 0.009999999999999998
          kl: 0.007884557616971656
          policy_loss: 0.0624717615544796
          total_loss: 0.05024895038869646
          vf_explained_var: 0.18485477566719055
          vf_loss: 0.001737929865743758
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,364,4128.09,364000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-09-20_10-57-03
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 366
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9980422695477804
          entropy_coeff: 0.009999999999999998
          kl: 0.010236179668495755
          policy_loss: 0.06879485332303577
          total_loss: 0.05643889158964157
          vf_explained_var: -0.46049371361732483
          vf_loss: 0.0013989594105320673
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,365,4138.36,365000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-09-20_10-57-14
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 367
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5923027568393284
          entropy_coeff: 0.009999999999999998
          kl: 0.006588190919110737
          policy_loss: -0.03732351664867666
          total_loss: -0.04833868708875444
          vf_explained_var: -0.7750734090805054
          vf_loss: 0.0009010100743681607
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,366,4148.64,366000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-09-20_10-57-24
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 368
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8650979545381334
          entropy_coeff: 0.009999999999999998
          kl: 0.009255500515942493
          policy_loss: -0.10930547018845876
          total_loss: -0.12163748509354061
          vf_explained_var: -0.10215996950864792
          vf_loss: 0.0006899034028821108
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,367,4158.95,367000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-09-20_10-57-35
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 369
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.81657821337382
          entropy_coeff: 0.009999999999999998
          kl: 0.01594108612359205
          policy_loss: 0.1209571444325977
          total_loss: 0.11299212310049268
          vf_explained_var: -0.17095589637756348
          vf_loss: 0.0005056116448637719
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,368,4169.3,368000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-09-20_10-57-45
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 370
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6534322222073874
          entropy_coeff: 0.009999999999999998
          kl: 0.015708222759406847
          policy_loss: -0.1476359297004011
          total_loss: -0.15364471334550117
          vf_explained_var: -0.412384957075119
          vf_loss: 0.0009720180723686805
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,369,4180.1,369000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-09-20_10-57-56
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 371
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9465789251857333
          entropy_coeff: 0.009999999999999998
          kl: 0.008276514088337903
          policy_loss: -0.022400380671024324
          total_loss: -0.03509891579548518
          vf_explained_var: 0.16879907250404358
          vf_loss: 0.001733593058048023
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,370,4190.96,370000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-09-20_10-58-07
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 372
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8189167592260573
          entropy_coeff: 0.009999999999999998
          kl: 0.010119379068465958
          policy_loss: -0.10545620796167188
          total_loss: -0.11633456285215087
          vf_explained_var: -0.766081690788269
          vf_loss: 0.0011563505763964106
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,371,4201.26,371000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-09-20_10-58-17
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 373
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8400133861435783
          entropy_coeff: 0.009999999999999998
          kl: 0.01047317064953993
          policy_loss: -0.06404599299033482
          total_loss: -0.07439022431564
          vf_explained_var: -0.9913510680198669
          vf_loss: 0.0016862651087447174
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,372,4211.54,372000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-09-20_10-58-28
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 374
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7026996188693577
          entropy_coeff: 0.009999999999999998
          kl: 0.01029703765101182
          policy_loss: -0.057619182537827225
          total_loss: -0.06621214394561119
          vf_explained_var: -0.30418068170547485
          vf_loss: 0.0021715232709539124
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,373,4222.21,373000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-09-20_10-58-39
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 375
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7603898233837552
          entropy_coeff: 0.009999999999999998
          kl: 0.012018649793520296
          policy_loss: -0.16680159771607983
          total_loss: -0.17522145410378773
          vf_explained_var: -0.5845116972923279
          vf_loss: 0.0018744650128711428
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,374,4233.24,374000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-09-20_10-58-50
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 376
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7587525950537788
          entropy_coeff: 0.009999999999999998
          kl: 0.005680767946604654
          policy_loss: -0.05706324838101864
          total_loss: -0.07077419641945097
          vf_explained_var: -0.8638802170753479
          vf_loss: 0.0004216126341311287
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,375,4244.33,375000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-09-20_10-59-01
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 377
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.890447575516171
          entropy_coeff: 0.009999999999999998
          kl: 0.01162412966954507
          policy_loss: -0.0628697567515903
          total_loss: -0.07408046672741572
          vf_explained_var: -0.7109578251838684
          vf_loss: 0.0006241358028394946
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,376,4255.71,376000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-09-20_10-59-12
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 378
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.698745055993398
          entropy_coeff: 0.009999999999999998
          kl: 0.0072645375258118825
          policy_loss: 0.07234637161923779
          total_loss: 0.06052774435116185
          vf_explained_var: -0.6515911221504211
          vf_loss: 0.0007506343815394858
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,377,4266.56,377000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-09-20_10-59-23
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 379
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3418995552592807
          entropy_coeff: 0.009999999999999998
          kl: 0.007076946266950972
          policy_loss: -0.010067561165326172
          total_loss: -0.01781955978108777
          vf_explained_var: -1.0
          vf_loss: 0.0013628986864609436
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,378,4277.38,378000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-09-20_10-59-34
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 380
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4580859422683716
          entropy_coeff: 0.009999999999999998
          kl: 0.01683182718490832
          policy_loss: -0.10164823486573166
          total_loss: -0.10426628018418947
          vf_explained_var: -0.1834937483072281
          vf_loss: 0.0017259319455155896
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,379,4288.85,379000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-09-20_10-59-45
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 381
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.576186606619093
          entropy_coeff: 0.009999999999999998
          kl: 0.01370494925620815
          policy_loss: -0.0523650118874179
          total_loss: -0.05856644759575526
          vf_explained_var: 0.1879500448703766
          vf_loss: 0.0012252719536061502
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,380,4299.98,380000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-09-20_10-59-57
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 382
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4651257064607408
          entropy_coeff: 0.009999999999999998
          kl: 0.014279855728471761
          policy_loss: -0.04047796502709389
          total_loss: -0.04557227368156115
          vf_explained_var: -0.8913498520851135
          vf_loss: 0.0008721407282994025
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,381,4311.31,381000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-09-20_11-00-08
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 383
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5453392095035976
          entropy_coeff: 0.009999999999999998
          kl: 0.009587442728908612
          policy_loss: -0.07194218726621734
          total_loss: -0.08089163576563199
          vf_explained_var: -0.857505202293396
          vf_loss: 0.0006729960040603247
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,382,4322.67,382000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-09-20_11-00-20
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 384
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3702028367254468
          entropy_coeff: 0.009999999999999998
          kl: 0.013994586439810617
          policy_loss: -0.013956234024630653
          total_loss: -0.017288256312410037
          vf_explained_var: -0.06564337760210037
          vf_loss: 0.0018586957293640202
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,383,4334.29,383000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-09-20_11-00-31
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 385
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.47712418768141
          entropy_coeff: 0.009999999999999998
          kl: 0.014529751142145544
          policy_loss: 0.018669214472174643
          total_loss: 0.013794914922780462
          vf_explained_var: -0.18208403885364532
          vf_loss: 0.0010601513870319144
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,384,4345.54,384000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-09-20_11-00-42
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 386
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.758509369691213
          entropy_coeff: 0.009999999999999998
          kl: 0.010043548641614209
          policy_loss: -0.010000590317779117
          total_loss: -0.02102396645479732
          vf_explained_var: -0.3671807646751404
          vf_loss: 0.0004533737369153338
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,385,4356.75,385000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-09-20_11-00-54
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 387
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5471841428014967
          entropy_coeff: 0.009999999999999998
          kl: 0.007124802488276948
          policy_loss: -0.11043652420242628
          total_loss: -0.12087535547713439
          vf_explained_var: -0.1342013031244278
          vf_loss: 0.0006998061901968968
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,386,4367.97,386000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-09-20_11-01-05
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 388
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3687037693129644
          entropy_coeff: 0.009999999999999998
          kl: 0.017023996465155337
          policy_loss: 0.03235841426584456
          total_loss: 0.02982647998465432
          vf_explained_var: -0.45859605073928833
          vf_loss: 0.0008013490315837165
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,387,4379.25,387000,-0.02,0,-1,995.67


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-09-20_11-01-16
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 389
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2429530554347568
          entropy_coeff: 0.009999999999999998
          kl: 0.012819592336662211
          policy_loss: 0.006031782883736823
          total_loss: 0.003971727440754572
          vf_explained_var: -1.0
          vf_loss: 0.002572779753892165
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,388,4390.4,388000,-0.02,0,-1,995.67




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-09-20_11-01-44
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 390
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2133811526828342
          entropy_coeff: 0.009999999999999998
          kl: 0.008775719141279205
          policy_loss: -0.02657515984028578
          total_loss: -0.03184930593189266
          vf_explained_var: -0.7731363773345947
          vf_loss: 0.0015223962778691202
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,389,4418.33,389000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-09-20_11-01-56
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 391
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2370854437351226
          entropy_coeff: 0.009999999999999998
          kl: 0.010666815849498738
          policy_loss: -0.030886611052685312
          total_loss: -0.03579764482047823
          vf_explained_var: 0.7969749569892883
          vf_loss: 0.0009724141632129128
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,390,4430.71,390000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-09-20_11-02-07
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 392
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 0.9094038936826918
          entropy_coeff: 0.009999999999999998
          kl: 0.009040016088644463
          policy_loss: -0.2090684473514557
          total_loss: -0.21151999996768103
          vf_explained_var: 0.7141673564910889
          vf_loss: 0.0011444733154753017
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,391,4441.25,391000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-09-20_11-02-18
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 393
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5169032520718044
          entropy_coeff: 0.009999999999999998
          kl: 0.011825158530888362
          policy_loss: -0.1346972319814894
          total_loss: -0.14109491176075406
          vf_explained_var: 0.2549324929714203
          vf_loss: 0.0015794584585819393
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,392,4451.76,392000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-09-20_11-02-28
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 394
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4576893223656548
          entropy_coeff: 0.009999999999999998
          kl: 0.013297891310159413
          policy_loss: -0.030095245213144356
          total_loss: -0.035065915756341486
          vf_explained_var: -0.27177894115448
          vf_loss: 0.0015186309746544188
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,393,4462.13,393000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-09-20_11-02-38
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 395
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 0.7967515624231762
          entropy_coeff: 0.009999999999999998
          kl: 0.008785422380175748
          policy_loss: -0.04593408033251763
          total_loss: -0.04745233034094175
          vf_explained_var: -0.04354729503393173
          vf_loss: 0.0011060960744442936
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,394,4472.36,394000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-09-20_11-02-49
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 396
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6347625851631165
          entropy_coeff: 0.009999999999999998
          kl: 0.008261552199068333
          policy_loss: -0.048330842910541426
          total_loss: -0.05867074992921617
          vf_explained_var: 0.16588488221168518
          vf_loss: 0.0009831582163719254
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,395,4482.86,395000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-09-20_11-02-59
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 397
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3215034001403385
          entropy_coeff: 0.009999999999999998
          kl: 0.011403875673643189
          policy_loss: -0.06406602569752269
          total_loss: -0.06959373176925712
          vf_explained_var: -0.06769371777772903
          vf_loss: 0.0007516495349894588
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,396,4493.56,396000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-09-20_11-03-10
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 398
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7079002406862047
          entropy_coeff: 0.009999999999999998
          kl: 0.008652889392545287
          policy_loss: -0.012519898141423862
          total_loss: -0.023499952008326847
          vf_explained_var: 0.25180870294570923
          vf_loss: 0.0008363814371275819
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,397,4503.97,397000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-09-20_11-03-21
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 399
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8051272922092014
          entropy_coeff: 0.009999999999999998
          kl: 0.008137153588573495
          policy_loss: 0.03411690750055843
          total_loss: 0.02182379456029998
          vf_explained_var: 0.23820200562477112
          vf_loss: 0.0008092565966459612
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,398,4514.57,398000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-09-20_11-03-31
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 400
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3532179491387473
          entropy_coeff: 0.009999999999999998
          kl: 0.009864428789744138
          policy_loss: -0.08169915891355939
          total_loss: -0.08630273408359951
          vf_explained_var: -0.3608376085758209
          vf_loss: 0.0029291947234822953
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,399,4525.16,399000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-09-20_11-03-42
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 401
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3256505290667215
          entropy_coeff: 0.009999999999999998
          kl: 0.008971468343945056
          policy_loss: -0.0934809274557564
          total_loss: -0.09987500326500999
          vf_explained_var: -0.3493940234184265
          vf_loss: 0.0014061078991896162
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,400,4536.1,400000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-09-20_11-03-53
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 402
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6745774428049722
          entropy_coeff: 0.009999999999999998
          kl: 0.009227312502087375
          policy_loss: 0.009801766234967443
          total_loss: -0.0006875007930729124
          vf_explained_var: 0.3041757643222809
          vf_loss: 0.0006445851849599017
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,401,4546.89,401000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-09-20_11-04-04
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 403
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.1103875577449798
          entropy_coeff: 0.009999999999999998
          kl: 0.01695779271245641
          policy_loss: -0.009511200586954753
          total_loss: -0.009001850874887573
          vf_explained_var: 0.2311231791973114
          vf_loss: 0.0012997355945925746
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,402,4557.8,402000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-09-20_11-04-15
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 404
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.652472554312812
          entropy_coeff: 0.009999999999999998
          kl: 0.017529705176508672
          policy_loss: -0.0013937959240542517
          total_loss: -0.006309830314583249
          vf_explained_var: -0.1293349415063858
          vf_loss: 0.0009473741164482716
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,403,4568.44,403000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-09-20_11-04-25
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 405
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3773672395282321
          entropy_coeff: 0.009999999999999998
          kl: 0.01115505441921398
          policy_loss: -0.07746232002973556
          total_loss: -0.08238969660467571
          vf_explained_var: -0.19330334663391113
          vf_loss: 0.0020619491315705496
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,404,4579.02,404000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-09-20_11-04-36
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 406
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.259493159585529
          entropy_coeff: 0.009999999999999998
          kl: 0.013260195966479
          policy_loss: -0.11613802661498387
          total_loss: -0.11955680168337292
          vf_explained_var: 0.029379654675722122
          vf_loss: 0.0011114920198451728
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,405,4589.46,405000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-09-20_11-04-46
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 407
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 0.9884219725926717
          entropy_coeff: 0.009999999999999998
          kl: 0.00784265242475719
          policy_loss: -0.051843269252114826
          total_loss: -0.056457676904069054
          vf_explained_var: 0.3398872911930084
          vf_loss: 0.000500021998595912
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,406,4599.93,406000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-09-20_11-04-57
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 408
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.167760557598538
          entropy_coeff: 0.009999999999999998
          kl: 0.005384229719276945
          policy_loss: 0.0005713513327969445
          total_loss: -0.007326686878999075
          vf_explained_var: 0.19990603625774384
          vf_loss: 0.000504954400498213
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,407,4610.64,407000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-09-20_11-05-07
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 409
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8669059342808194
          entropy_coeff: 0.009999999999999998
          kl: 0.012034675517477132
          policy_loss: -0.10573495075934464
          total_loss: -0.11567112861408127
          vf_explained_var: 0.20938527584075928
          vf_loss: 0.001413561669889734
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,408,4621.28,408000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-09-20_11-05-19
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 410
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2762946976555718
          entropy_coeff: 0.009999999999999998
          kl: 0.010901187007898017
          policy_loss: -0.08544086451745696
          total_loss: -0.09029326368537215
          vf_explained_var: 0.09157675504684448
          vf_loss: 0.001280596288658368
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,409,4632.89,409000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-09-20_11-05-30
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 411
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.432637596130371
          entropy_coeff: 0.009999999999999998
          kl: 0.009909956865298655
          policy_loss: -0.0731591092215644
          total_loss: -0.07979412604537275
          vf_explained_var: 0.11775247007608414
          vf_loss: 0.001664261203000529
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,410,4643.9,410000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-09-20_11-05-41
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 412
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.417228321896659
          entropy_coeff: 0.009999999999999998
          kl: 0.006183664179238511
          policy_loss: -0.06802652205030123
          total_loss: -0.07788567981786199
          vf_explained_var: 0.6598657369613647
          vf_loss: 0.0005523054439335182
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,411,4655.19,411000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-09-20_11-05-52
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 413
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.689107678996192
          entropy_coeff: 0.009999999999999998
          kl: 0.008312550374919386
          policy_loss: -0.08824391981793775
          total_loss: -0.09932836931612757
          vf_explained_var: 0.27808326482772827
          vf_loss: 0.0007510491234522002
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,412,4665.96,412000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-09-20_11-06-03
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 414
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6587088346481322
          entropy_coeff: 0.009999999999999998
          kl: 0.006885702601343545
          policy_loss: -0.16060699510077636
          total_loss: -0.1726071504669057
          vf_explained_var: 0.7502858638763428
          vf_loss: 0.000399143395204899
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,413,4676.83,413000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-09-20_11-06-14
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 415
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.1192789879110125
          entropy_coeff: 0.009999999999999998
          kl: 0.007649259616130561
          policy_loss: -0.05247427796324094
          total_loss: -0.058100932236346936
          vf_explained_var: 0.20344725251197815
          vf_loss: 0.0009139638248598203
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,414,4687.71,414000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-09-20_11-06-25
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 416
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.1927681763966878
          entropy_coeff: 0.009999999999999998
          kl: 0.0098227387056128
          policy_loss: -0.04625847459667259
          total_loss: -0.05147730078962114
          vf_explained_var: 0.265259325504303
          vf_loss: 0.0007347996893157768
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,415,4698.95,415000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-09-20_11-06-37
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 417
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4034481830067105
          entropy_coeff: 0.009999999999999998
          kl: 0.013080820550232157
          policy_loss: 0.010633154627349642
          total_loss: 0.005208550973070992
          vf_explained_var: -0.2423780858516693
          vf_loss: 0.0006543083662513203
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,416,4710.22,416000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-09-20_11-06-48
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 418
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4527569995986092
          entropy_coeff: 0.009999999999999998
          kl: 0.00751344403603969
          policy_loss: -0.03278137466145886
          total_loss: -0.04191116522997618
          vf_explained_var: -0.5201424360275269
          vf_loss: 0.0008282080341208105
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,417,4721.19,417000,-0.02,0,-1,995.68


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-09-20_11-06-58
  done: false
  episode_len_mean: 995.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 419
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.425469586584303
          entropy_coeff: 0.009999999999999998
          kl: 0.013247024501062852
          policy_loss: -0.14589108646743829
          total_loss: -0.15130216624173853
          vf_explained_var: -0.26200976967811584
          vf_loss: 0.0007869607379284894
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,418,4731.65,418000,-0.02,0,-1,995.68




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-09-20_11-07-28
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 420
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 0.8139725353982713
          entropy_coeff: 0.009999999999999998
          kl: 0.007609994066422211
          policy_loss: -0.1495703445540534
          total_loss: -0.1527299698856142
          vf_explained_var: 0.559560239315033
          vf_loss: 0.0003518083737516362
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,419,4761.26,419000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-09-20_11-07-38
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 421
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7292224314477709
          entropy_coeff: 0.009999999999999998
          kl: 0.013793181667857684
          policy_loss: -0.21168670116199387
          total_loss: -0.2193331956035561
          vf_explained_var: 0.09749246388673782
          vf_loss: 0.0012569141710021845
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,420,4771.79,420000,-0.02,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-09-20_11-07-49
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 422
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6930737045076159
          entropy_coeff: 0.009999999999999998
          kl: 0.014045076761440543
          policy_loss: 0.0022001500758859847
          total_loss: -0.0054042495787143706
          vf_explained_var: -0.13895238935947418
          vf_loss: 0.000784318180457275
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,421,4782.29,421000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-09-20_11-08-00
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 423
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4664870613151126
          entropy_coeff: 0.009999999999999998
          kl: 0.009425834633947734
          policy_loss: -0.03598168028725518
          total_loss: -0.04410423512260119
          vf_explained_var: 0.05058125779032707
          vf_loss: 0.0008096538066941623
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,422,4793.08,422000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-09-20_11-08-10
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 424
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.2749207099278768
          entropy_coeff: 0.009999999999999998
          kl: 0.013327754712858998
          policy_loss: -0.0726699552188317
          total_loss: -0.07686750143766403
          vf_explained_var: 0.016486018896102905
          vf_loss: 0.00044590362845661324
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,423,4803.58,423000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-09-20_11-08-21
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 425
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4732457445727454
          entropy_coeff: 0.009999999999999998
          kl: 0.010428423383472957
          policy_loss: 0.05554707853330506
          total_loss: 0.04769387145837148
          vf_explained_var: -0.957500696182251
          vf_loss: 0.0005368288117501329
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,424,4814.62,424000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-09-20_11-08-32
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 426
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3386192017131382
          entropy_coeff: 0.009999999999999998
          kl: 0.009828692361820442
          policy_loss: -0.06313189657198058
          total_loss: -0.06982550927334362
          vf_explained_var: -0.3742022216320038
          vf_loss: 0.0007149059965740889
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,425,4825.15,425000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-09-20_11-08-43
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 427
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.581328304608663
          entropy_coeff: 0.009999999999999998
          kl: 0.008702014538066965
          policy_loss: 0.038520285238822304
          total_loss: 0.02957984076605903
          vf_explained_var: -0.3829003572463989
          vf_loss: 0.0015803961924070286
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,426,4836.14,426000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-09-20_11-08-54
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 428
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.3367787506845263
          entropy_coeff: 0.009999999999999998
          kl: 0.010031366461423295
          policy_loss: -0.07271979277332624
          total_loss: -0.07867603302001953
          vf_explained_var: 0.17331212759017944
          vf_loss: 0.0013106131754789707
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,427,4847.19,427000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-09-20_11-09-05
  done: false
  episode_len_mean: 994.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 429
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4753958781560261
          entropy_coeff: 0.009999999999999998
          kl: 0.007316889296583421
          policy_loss: -0.008224940569036537
          total_loss: -0.018054343428876664
          vf_explained_var: -0.8383804559707642
          vf_loss: 0.0004745255914814253
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,428,4857.98,428000,-0.01,0,-1,994.3


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-09-20_11-09-15
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 430
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7550573574172126
          entropy_coeff: 0.009999999999999998
          kl: 0.01223929939633166
          policy_loss: 0.004054866317245696
          total_loss: -0.004940741322934627
          vf_explained_var: -0.6312506794929504
          vf_loss: 0.0011111976003222582
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,429,4868.58,429000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-09-20_11-09-26
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 431
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5328472640779283
          entropy_coeff: 0.009999999999999998
          kl: 0.00964664938759512
          policy_loss: -0.03738418229752117
          total_loss: -0.04648125130269262
          vf_explained_var: -0.9391605854034424
          vf_loss: 0.00036444753972722944
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,430,4879.35,430000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-09-20_11-09-37
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 432
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.4319037437438964
          entropy_coeff: 0.009999999999999998
          kl: 0.008175828546118345
          policy_loss: -0.009058454881111781
          total_loss: -0.01759073966079288
          vf_explained_var: -0.5090411901473999
          vf_loss: 0.0008143299808984415
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,431,4890.22,431000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-09-20_11-09-48
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 433
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6665477196375529
          entropy_coeff: 0.009999999999999998
          kl: 0.011537237838031681
          policy_loss: -0.03643635271324052
          total_loss: -0.04488609561489688
          vf_explained_var: 0.20504429936408997
          vf_loss: 0.001198945902999387
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,432,4900.96,432000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-09-20_11-09-59
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 434
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6020319037967259
          entropy_coeff: 0.009999999999999998
          kl: 0.006021012617177421
          policy_loss: -0.01230623938350214
          total_loss: -0.023952460930579237
          vf_explained_var: -0.7638553977012634
          vf_loss: 0.0007122015845703168
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,433,4911.8,433000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-09-20_11-10-09
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 435
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8323963589138454
          entropy_coeff: 0.009999999999999998
          kl: 0.009990213722108225
          policy_loss: -0.06859484650194644
          total_loss: -0.07940442003309726
          vf_explained_var: 0.022949734702706337
          vf_loss: 0.0014384841837454588
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,434,4922.59,434000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-09-20_11-10-20
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 436
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.989873190720876
          entropy_coeff: 0.009999999999999998
          kl: 0.010708440773289772
          policy_loss: -0.012771044505967034
          total_loss: -0.025121561106708316
          vf_explained_var: -0.31811249256134033
          vf_loss: 0.0010354940339715945
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,435,4933.32,435000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-09-20_11-10-31
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 437
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.960682651731703
          entropy_coeff: 0.009999999999999998
          kl: 0.006685728261713445
          policy_loss: -0.12119293734431266
          total_loss: -0.13452457408938143
          vf_explained_var: 0.10326720029115677
          vf_loss: 0.0022090222277458653
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,436,4943.7,436000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-09-20_11-10-41
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 438
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.650204144583808
          entropy_coeff: 0.009999999999999998
          kl: 0.010432813227267662
          policy_loss: -0.0002237795541683833
          total_loss: -0.009354051036967171
          vf_explained_var: -0.9031399488449097
          vf_loss: 0.001026677913129485
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,437,4954.08,437000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-09-20_11-10-52
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 439
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.6976191732618544
          entropy_coeff: 0.009999999999999998
          kl: 0.006688354948971047
          policy_loss: 0.00043340639935599435
          total_loss: -0.01200753812574678
          vf_explained_var: -1.0
          vf_loss: 0.0004674837452411238
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,438,4964.7,438000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-09-20_11-11-02
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 440
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7984605418311226
          entropy_coeff: 0.009999999999999998
          kl: 0.00808868472791957
          policy_loss: 0.008156573741386335
          total_loss: -0.004434058453059859
          vf_explained_var: -0.33373865485191345
          vf_loss: 0.00047454891949503993
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,439,4975.19,439000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-09-20_11-11-13
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 441
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.5800835569699605
          entropy_coeff: 0.009999999999999998
          kl: 0.012894108406465874
          policy_loss: 0.011280825568570031
          total_loss: 0.003986479341983795
          vf_explained_var: -0.41894498467445374
          vf_loss: 0.0006644728811807
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,440,4985.89,440000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-09-20_11-11-23
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 442
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7116729789310032
          entropy_coeff: 0.009999999999999998
          kl: 0.013220297754029097
          policy_loss: 0.03296915123032199
          total_loss: 0.024209677883320384
          vf_explained_var: -1.0
          vf_loss: 0.0003168557663836206
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,441,4996.46,441000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-09-20_11-11-34
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 443
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.946662535932329
          entropy_coeff: 0.009999999999999998
          kl: 0.010974512385262248
          policy_loss: -0.06580792433685727
          total_loss: -0.07743279097808732
          vf_explained_var: -0.16249491274356842
          vf_loss: 0.0011672150751110167
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,442,5007.28,442000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-09-20_11-11-45
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 444
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.7948654214541118
          entropy_coeff: 0.009999999999999998
          kl: 0.013758280261806435
          policy_loss: -0.036990583708716766
          total_loss: -0.046279282743732136
          vf_explained_var: -0.967132031917572
          vf_loss: 0.00029236245981236505
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,443,5018.2,443000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-09-20_11-11-56
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 445
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.600283020072513
          entropy_coeff: 0.009999999999999998
          kl: 0.008600725005449188
          policy_loss: -0.04099046836296717
          total_loss: -0.05150047540664673
          vf_explained_var: -0.9897683262825012
          vf_loss: 0.00026198224050959755
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,444,5029.08,444000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-09-20_11-12-08
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 446
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.1302122672398887
          entropy_coeff: 0.009999999999999998
          kl: 0.012654704353608616
          policy_loss: -0.10694534260158738
          total_loss: -0.11969716443369786
          vf_explained_var: -0.04102559760212898
          vf_loss: 0.0008538890005891314
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,445,5040.62,445000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-09-20_11-12-19
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 447
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.0370157215330336
          entropy_coeff: 0.009999999999999998
          kl: 0.0019474752666860558
          policy_loss: -0.13359069791105058
          total_loss: -0.1520075872540474
          vf_explained_var: -0.5719944834709167
          vf_loss: 0.000768840851702003
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,446,5051.6,446000,-0.01,0,-1,995.72


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-09-20_11-12-30
  done: false
  episode_len_mean: 995.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 448
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.634993741247389
          entropy_coeff: 0.009999999999999998
          kl: 0.006761988914947716
          policy_loss: -0.00623565970454365
          total_loss: -0.006309724330074257
          vf_explained_var: -0.6526934504508972
          vf_loss: 0.014219599713720122
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,447,5062.39,447000,-0.02,0,-1,995.72




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-09-20_11-12-58
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 2
  episodes_total: 450
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6737669547398886
          entropy_coeff: 0.009999999999999998
          kl: 0.011879341606902629
          policy_loss: 0.004516191884047455
          total_loss: -0.008212958741933108
          vf_explained_var: -0.8141162395477295
          vf_loss: 0.0003960942332115438
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,448,5090.86,448000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-09-20_11-13-10
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 451
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.683262778653039
          entropy_coeff: 0.009999999999999998
          kl: 0.012714124404220841
          policy_loss: -0.045427487588798006
          total_loss: -0.05806313932666348
          vf_explained_var: -0.9708525538444519
          vf_loss: 0.0003307004680714777
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,449,5102.85,449000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-09-20_11-13-20
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 452
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8981020463837517
          entropy_coeff: 0.009999999999999998
          kl: 0.009507253414527739
          policy_loss: -0.03293915684852335
          total_loss: -0.04822647555006875
          vf_explained_var: -0.3064396381378174
          vf_loss: 0.0008026122936927196
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,450,5112.99,450000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-09-20_11-13-30
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 453
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7292574485143026
          entropy_coeff: 0.009999999999999998
          kl: 0.006826119837555211
          policy_loss: -0.03868531059059832
          total_loss: -0.05359688670270973
          vf_explained_var: -1.0
          vf_loss: 0.0003052236104849726
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,451,5123.27,451000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-09-20_11-13-41
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 454
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 2.0598373227649267
          entropy_coeff: 0.009999999999999998
          kl: 0.01578591001962923
          policy_loss: 0.010771971113151974
          total_loss: -0.004434583129154311
          vf_explained_var: 0.13150723278522491
          vf_loss: 0.0005914354690402333
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,452,5133.45,452000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-09-20_11-13-51
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 455
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.734063282277849
          entropy_coeff: 0.009999999999999998
          kl: 0.008865022324724907
          policy_loss: -0.0020436628411213556
          total_loss: -0.016210566750831074
          vf_explained_var: -1.0
          vf_loss: 0.0004779382578110219
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,453,5143.91,453000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-09-20_11-14-02
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 456
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6906702796618143
          entropy_coeff: 0.009999999999999998
          kl: 0.008997525630371426
          policy_loss: 0.0053117964416742325
          total_loss: -0.00856664946509732
          vf_explained_var: -0.9993357062339783
          vf_loss: 0.0002921747992837077
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_since_restore: 454
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,454,5154.3,454000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-09-20_11-14-12
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 457
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6734193232324388
          entropy_coeff: 0.009999999999999998
          kl: 0.010423090297489177
          policy_loss: 0.00741298809233639
          total_loss: -0.005651730971617831
          vf_explained_var: -1.0
          vf_loss: 0.0004998862056203911
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_since_restore: 455
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,455,5164.83,455000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-09-20_11-14-23
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 458
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7095012373394436
          entropy_coeff: 0.009999999999999998
          kl: 0.00708403366116972
          policy_loss: -0.07431302136845058
          total_loss: -0.08906440018779702
          vf_explained_var: -0.999800443649292
          vf_loss: 0.0001894273125799373
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_since_restore: 456
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,456,5175.24,456000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-09-20_11-14-33
  done: false
  episode_len_mean: 994.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 459
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6598049230045742
          entropy_coeff: 0.009999999999999998
          kl: 0.011444420374910318
          policy_loss: -0.0554173581302166
          total_loss: -0.06818350938459238
          vf_explained_var: -0.8252612352371216
          vf_loss: 0.0003517284436384216
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_since_restore: 457
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,457,5185.75,457000,-0.02,0,-1,994.34


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-09-20_11-14-44
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 460
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8559256937768724
          entropy_coeff: 0.009999999999999998
          kl: 0.014816495131248135
          policy_loss: 0.012583499236239328
          total_loss: -0.0010045219626691607
          vf_explained_var: -0.2798888683319092
          vf_loss: 0.00046564099514701717
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_since_restore: 458
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,458,5196.2,458000,-0.02,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-09-20_11-14-54
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 461
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.747800894578298
          entropy_coeff: 0.009999999999999998
          kl: 0.015445979096165156
          policy_loss: -0.09592320397496223
          total_loss: -0.10802764271696409
          vf_explained_var: -0.2018609195947647
          vf_loss: 0.0006765577968003021
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iterations_since_restore: 459
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,459,5206.57,459000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-09-20_11-15-05
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 462
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7334299471643235
          entropy_coeff: 0.009999999999999998
          kl: 0.008133758352264467
          policy_loss: -0.05019074127905899
          total_loss: -0.06469245750664009
          vf_explained_var: -0.442300409078598
          vf_loss: 0.0003591654744620124
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterations_since_restore: 460
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,460,5217.07,460000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-09-20_11-15-15
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 463
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7123467644055685
          entropy_coeff: 0.009999999999999998
          kl: 0.015515050658277553
          policy_loss: -0.12357410291830699
          total_loss: -0.13549091120560963
          vf_explained_var: -0.07998088002204895
          vf_loss: 0.0004886427546605571
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_since_restore: 461
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,461,5227.52,461000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-09-20_11-15-25
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 464
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6959128975868225
          entropy_coeff: 0.009999999999999998
          kl: 0.011361611184698549
          policy_loss: -0.08504523684581121
          total_loss: -0.09721843898296356
          vf_explained_var: -0.1505029946565628
          vf_loss: 0.0013309400832642697
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterations_since_restore: 462
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,462,5237.97,462000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-09-20_11-15-36
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 465
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7481434928046333
          entropy_coeff: 0.009999999999999998
          kl: 0.007800079223927901
          policy_loss: -0.10114671231971847
          total_loss: -0.11584152405460675
          vf_explained_var: -0.5631569623947144
          vf_loss: 0.00041467044179120823
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iterations_since_restore: 463
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,463,5248.43,463000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-09-20_11-15-46
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 466
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7640856928295559
          entropy_coeff: 0.009999999999999998
          kl: 0.010108653000673219
          policy_loss: -0.05413470034384065
          total_loss: -0.06835486601210303
          vf_explained_var: -0.9083054661750793
          vf_loss: 0.00034671848099808103
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_since_restore: 464
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,464,5258.97,464000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-09-20_11-15-57
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 467
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8181776483853658
          entropy_coeff: 0.009999999999999998
          kl: 0.008807931640520966
          policy_loss: -0.07644647728237841
          total_loss: -0.09140252992510796
          vf_explained_var: -0.7753193378448486
          vf_loss: 0.0005472941432445724
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_since_restore: 465
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,465,5269.62,465000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-09-20_11-16-08
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 468
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8178137395117018
          entropy_coeff: 0.009999999999999998
          kl: 0.009455422772329945
          policy_loss: 0.0329044450695316
          total_loss: 0.017801982723176478
          vf_explained_var: -0.6287462711334229
          vf_loss: 0.00020034787521581166
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_since_restore: 466
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,466,5280.18,466000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-09-20_11-16-18
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 469
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.978594920370314
          entropy_coeff: 0.009999999999999998
          kl: 0.00859585086964548
          policy_loss: 0.00346029092454248
          total_loss: -0.012948209916551908
          vf_explained_var: -0.47386741638183594
          vf_loss: 0.0007635072546286715
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  iterations_since_restore: 467
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,467,5290.35,467000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-09-20_11-16-28
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 470
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8151022950808207
          entropy_coeff: 0.009999999999999998
          kl: 0.011185516016381274
          policy_loss: -0.11814996939566401
          total_loss: -0.13234559413459565
          vf_explained_var: -0.9530873894691467
          vf_loss: 0.0005539603676879779
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  iterations_since_restore: 468
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,468,5300.83,468000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-09-20_11-16-39
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 471
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7049630999565124
          entropy_coeff: 0.009999999999999998
          kl: 0.019142494334334344
          policy_loss: -0.06589969461783766
          total_loss: -0.07483141608536244
          vf_explained_var: -0.45830488204956055
          vf_loss: 0.0022968110327686495
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_since_restore: 469
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,469,5311.08,469000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-09-20_11-16-49
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 472
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.7892375694380867
          entropy_coeff: 0.009999999999999998
          kl: 0.009544922374260563
          policy_loss: -0.03450334332883358
          total_loss: -0.0490371404784835
          vf_explained_var: -0.9954811334609985
          vf_loss: 0.00045603531697351073
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations_since_restore: 470
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,470,5321.74,470000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-09-20_11-17-00
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 473
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8150219149059719
          entropy_coeff: 0.009999999999999998
          kl: 0.012021553367390069
          policy_loss: -0.01869225891100036
          total_loss: -0.0325640960286061
          vf_explained_var: -1.0
          vf_loss: 0.000622711110918317
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iterations_since_restore: 471
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,471,5332.38,471000,-0.01,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-09-20_11-17-11
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 474
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8699987106853062
          entropy_coeff: 0.009999999999999998
          kl: 0.011321325460206496
          policy_loss: -0.0344854135480192
          total_loss: -0.040011297166347506
          vf_explained_var: -0.5715206861495972
          vf_loss: 0.009731368813016969
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iterations_since_restore: 472
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,472,5343.26,472000,-0.02,0,-1,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-09-20_11-17-22
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 475
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8411512639787462
          entropy_coeff: 0.009999999999999998
          kl: 0.01684482735623843
          policy_loss: 0.05025360302792655
          total_loss: 0.07607605341407987
          vf_explained_var: -0.14601878821849823
          vf_loss: 0.03911157234752965
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iterations_since_restore: 473
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,473,5354.14,473000,-0.05,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-09-20_11-17-33
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 476
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9334402402242026
          entropy_coeff: 0.009999999999999998
          kl: 0.013531625992307995
          policy_loss: -0.073523530157076
          total_loss: -0.06399068819979827
          vf_explained_var: 0.10787691175937653
          vf_loss: 0.024752374545722787
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_since_restore: 474
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,474,5365.55,474000,-0.05,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-09-20_11-17-44
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 477
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.6400721311569213
          entropy_coeff: 0.009999999999999998
          kl: 0.017451846004181013
          policy_loss: -0.020514689220322502
          total_loss: -0.006132721114489767
          vf_explained_var: 0.004722881130874157
          vf_loss: 0.025475705004323067
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations_since_restore: 475
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,475,5376.55,475000,-0.07,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-09-20_11-17-55
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 478
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8175673908657497
          entropy_coeff: 0.009999999999999998
          kl: 0.012693872788907731
          policy_loss: 0.0294212330546644
          total_loss: 0.015562412722243204
          vf_explained_var: -0.7038480639457703
          vf_loss: 0.00045673436511100993
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_since_restore: 476
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,476,5387.39,476000,-0.07,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-09-20_11-18-06
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 479
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8867312762472364
          entropy_coeff: 0.009999999999999998
          kl: 0.009778116240845512
          policy_loss: -0.13101996150281695
          total_loss: -0.1391363185313013
          vf_explained_var: -0.5991606116294861
          vf_loss: 0.007777500227611098
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iterations_since_restore: 477
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,477,5397.88,477000,-0.07,0,-3,995.87




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-09-20_11-18-34
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 480
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.926879644393921
          entropy_coeff: 0.009999999999999998
          kl: 0.011020008690977849
          policy_loss: -0.07897354952163166
          total_loss: -0.08980505536827776
          vf_explained_var: 0.3285982310771942
          vf_loss: 0.0050861859053839
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations_since_restore: 478
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,478,5426.28,478000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-09-20_11-18-46
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 481
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9272206915749444
          entropy_coeff: 0.009999999999999998
          kl: 0.010689074802706502
          policy_loss: -0.04325679308838314
          total_loss: -0.05342018351786666
          vf_explained_var: 0.0029630574863404036
          vf_loss: 0.005858343371397091
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_since_restore: 479
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,479,5438.46,479000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-09-20_11-18-57
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 482
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.9559771829181247
          entropy_coeff: 0.009999999999999998
          kl: 0.01421764315326336
          policy_loss: 0.01906017205781407
          total_loss: 0.005599641717142529
          vf_explained_var: -0.25826889276504517
          vf_loss: 0.0017757566448482168
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_since_restore: 480
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,480,5448.87,480000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-09-20_11-19-07
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 483
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.879900218380822
          entropy_coeff: 0.009999999999999998
          kl: 0.009883977764742606
          policy_loss: -0.031028990757962067
          total_loss: -0.04637696420152982
          vf_explained_var: -0.9917554259300232
          vf_loss: 0.00044538067013490947
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iterations_since_restore: 481
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,481,5459.07,481000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-09-20_11-19-17
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 484
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8901672813627455
          entropy_coeff: 0.009999999999999998
          kl: 0.017494307707347487
          policy_loss: -0.05835503372881148
          total_loss: -0.0711410397870673
          vf_explained_var: -0.4874001443386078
          vf_loss: 0.0007957712339702993
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_since_restore: 482
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,482,5469.47,482000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-09-20_11-19-28
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 485
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8681828445858426
          entropy_coeff: 0.009999999999999998
          kl: 0.014797946168073435
          policy_loss: -0.12373300087120798
          total_loss: -0.13467323068115447
          vf_explained_var: -0.15469810366630554
          vf_loss: 0.003241648310278025
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterations_since_restore: 483
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,483,5479.73,483000,-0.07,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-09-20_11-19-38
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 486
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.8292755524317423
          entropy_coeff: 0.009999999999999998
          kl: 0.022403173102126868
          policy_loss: -0.016681531071662904
          total_loss: -0.007618860155344009
          vf_explained_var: 0.265838623046875
          vf_loss: 0.020542778165286615
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_since_restore: 484
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,484,5489.85,484000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-09-20_11-19-48
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 487
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.776714289188385
          entropy_coeff: 0.009999999999999998
          kl: 0.019036461440395978
          policy_loss: -0.06262420585585965
          total_loss: -0.0657983311969373
          vf_explained_var: 0.08613990247249603
          vf_loss: 0.005909735311676437
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_since_restore: 485
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,485,5500.14,485000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-09-20_11-19-58
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 488
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7083157036039565
          entropy_coeff: 0.009999999999999998
          kl: 0.008684083231016004
          policy_loss: -0.08286655309299627
          total_loss: -0.09479970182809565
          vf_explained_var: -0.48722195625305176
          vf_loss: 0.00118885353505094
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_since_restore: 486
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,486,5510.3,486000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-09-20_11-20-08
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 489
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6123149832089743
          entropy_coeff: 0.009999999999999998
          kl: 0.013704761404802292
          policy_loss: -0.04439457183082898
          total_loss: -0.043708311021327974
          vf_explained_var: 0.10977283120155334
          vf_loss: 0.010558127047907975
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_since_restore: 487
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,487,5520.25,487000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-09-20_11-20-19
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 490
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6516992886861166
          entropy_coeff: 0.009999999999999998
          kl: 0.010809115329630439
          policy_loss: -0.03412971713890632
          total_loss: -0.039895733156137994
          vf_explained_var: 0.31219005584716797
          vf_loss: 0.005820512104805352
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_since_restore: 488
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,488,5530.54,488000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-09-20_11-20-29
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 491
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.9499860048294066
          entropy_coeff: 0.009999999999999998
          kl: 0.00905911919056847
          policy_loss: -0.12162883256872495
          total_loss: -0.13562895469367503
          vf_explained_var: -0.1183161735534668
          vf_loss: 0.0013675148010305646
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since_restore: 489
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,489,5541.06,489000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-09-20_11-20-40
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 492
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.9044922206136916
          entropy_coeff: 0.009999999999999998
          kl: 0.011087835734808532
          policy_loss: -0.1629276697834333
          total_loss: -0.1761064424696896
          vf_explained_var: -0.42513301968574524
          vf_loss: 0.0008085491239196725
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_restore: 490
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,490,5551.71,490000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-09-20_11-20-51
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 493
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.770911615424686
          entropy_coeff: 0.009999999999999998
          kl: 0.006662153223543419
          policy_loss: -0.06148558596356048
          total_loss: -0.0759151664459043
          vf_explained_var: -0.8731422424316406
          vf_loss: 0.0002406638817370145
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since_restore: 491
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,491,5562.34,491000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-09-20_11-21-01
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 494
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8601184765497842
          entropy_coeff: 0.009999999999999998
          kl: 0.008431342570036375
          policy_loss: -0.05498699000519183
          total_loss: -0.06935776667669416
          vf_explained_var: -0.9405680894851685
          vf_loss: 0.0003845387190166447
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_since_restore: 492
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,492,5572.87,492000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-09-20_11-21-12
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 495
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8140512466430665
          entropy_coeff: 0.009999999999999998
          kl: 0.01738488867083247
          policy_loss: -0.06592611326939529
          total_loss: -0.07242584141592184
          vf_explained_var: -0.3302379548549652
          vf_loss: 0.003710848863960968
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_restore: 493
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,493,5583.3,493000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-09-20_11-21-22
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 496
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8047197765774197
          entropy_coeff: 0.009999999999999998
          kl: 0.009170802913044599
          policy_loss: -0.03386100406448046
          total_loss: -0.04738998711109162
          vf_explained_var: -0.357991486787796
          vf_loss: 0.00033504880864509485
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_since_restore: 494
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,494,5593.94,494000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-09-20_11-21-33
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 497
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.868179319964515
          entropy_coeff: 0.009999999999999998
          kl: 0.007479127019791873
          policy_loss: 0.03138053661419286
          total_loss: 0.016309486495123968
          vf_explained_var: -0.9494247436523438
          vf_loss: 0.00019921819249349128
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since_restore: 495
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,495,5604.46,495000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-09-20_11-21-43
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 498
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8079118119345772
          entropy_coeff: 0.009999999999999998
          kl: 0.007001318193757693
          policy_loss: -0.07898303154442045
          total_loss: -0.0934077762067318
          vf_explained_var: -0.4961286783218384
          vf_loss: 0.00046079594887689585
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_since_restore: 496
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,496,5615.07,496000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-09-20_11-21-54
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 499
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.9201025035646226
          entropy_coeff: 0.009999999999999998
          kl: 0.011658187147000731
          policy_loss: -0.0106372299293677
          total_loss: -0.01816545298529996
          vf_explained_var: -0.09156882017850876
          vf_loss: 0.006355047017374697
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_since_restore: 497
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,497,5625.79,497000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-09-20_11-22-05
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 500
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8332609335581462
          entropy_coeff: 0.009999999999999998
          kl: 0.009575907903095171
          policy_loss: -0.11628932348555988
          total_loss: -0.1298091856141885
          vf_explained_var: -0.7611931562423706
          vf_loss: 0.0004447956727947005
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_since_restore: 498
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,498,5636.42,498000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-09-20_11-22-15
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 501
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8512151956558227
          entropy_coeff: 0.009999999999999998
          kl: 0.009552477838642047
          policy_loss: -0.03677544680734476
          total_loss: -0.05034913366867436
          vf_explained_var: -0.6352692246437073
          vf_loss: 0.0005812019281115176
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_since_restore: 499
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,499,5647.01,499000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-09-20_11-22-26
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 502
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7264758004082574
          entropy_coeff: 0.009999999999999998
          kl: 0.02012258969850187
          policy_loss: -0.09482422429654333
          total_loss: -0.10200049384600586
          vf_explained_var: -0.5581291913986206
          vf_loss: 0.000909779343378937
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations_since_restore: 500
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,500,5657.52,500000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-09-20_11-22-36
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 503
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.8192353659205966
          entropy_coeff: 0.009999999999999998
          kl: 0.006134625435025853
          policy_loss: -0.09255860325776868
          total_loss: -0.10617799531254503
          vf_explained_var: -0.9726760983467102
          vf_loss: 0.00037559265781763115
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_since_restore: 501
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,501,5668.04,501000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-09-20_11-22-47
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 504
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.8117696934276157
          entropy_coeff: 0.009999999999999998
          kl: 0.010214157532939971
          policy_loss: 0.03466015379461977
          total_loss: 0.02418600685066647
          vf_explained_var: -0.47172534465789795
          vf_loss: 0.0006549270629572372
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_since_restore: 502
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,502,5678.65,502000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-09-20_11-22-58
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 505
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.5801924188931784
          entropy_coeff: 0.009999999999999998
          kl: 0.010379114272763208
          policy_loss: -0.05180263924929831
          total_loss: -0.05897722426387999
          vf_explained_var: 0.2985255718231201
          vf_loss: 0.0015258535671617008
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_since_restore: 503
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,503,5689.38,503000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-09-20_11-23-08
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 506
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.7065417806307475
          entropy_coeff: 0.009999999999999998
          kl: 0.0070601862442848705
          policy_loss: -0.02654289450082514
          total_loss: -0.03789825319415993
          vf_explained_var: -0.30530571937561035
          vf_loss: 0.0008794137554812349
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_since_restore: 504
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,504,5699.79,504000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-09-20_11-23-19
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 507
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.6394095871183607
          entropy_coeff: 0.009999999999999998
          kl: 0.010539901527460198
          policy_loss: -0.0862783036298222
          total_loss: -0.09482292578452163
          vf_explained_var: -0.8650169968605042
          vf_loss: 0.0006379747346121196
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_restore: 505
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,505,5710.32,505000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-09-20_11-23-29
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 508
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.720747062895033
          entropy_coeff: 0.009999999999999998
          kl: 0.011527376869284664
          policy_loss: -0.040904731510414016
          total_loss: -0.049079276704125936
          vf_explained_var: -0.95653235912323
          vf_loss: 0.001145786692440096
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since_restore: 506
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,506,5720.87,506000,-0.08,0,-3,995.87


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-09-20_11-23-40
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 509
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.6742236720191108
          entropy_coeff: 0.009999999999999998
          kl: 0.006176395936074641
          policy_loss: -0.03536740648042824
          total_loss: -0.04733310401853588
          vf_explained_var: -1.0
          vf_loss: 0.0005505923301421313
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_since_restore: 507
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,507,5731.32,507000,-0.08,0,-3,995.87




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-09-20_11-24-08
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 510
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6842092037200928
          cur_lr: 5.000000000000001e-05
          entropy: 1.328189445866479
          entropy_coeff: 0.009999999999999998
          kl: 0.02070804718045915
          policy_loss: 0.04742543465561337
          total_loss: 0.05247969561152988
          vf_explained_var: 0.39161545038223267
          vf_loss: 0.004167517054722541
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterations_since_restore: 508
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,508,5759.29,508000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-09-20_11-24-18
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 511
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.6964731666776869
          entropy_coeff: 0.009999999999999998
          kl: 0.00606508455267058
          policy_loss: -0.04874051807241307
          total_loss: -0.05891980874455637
          vf_explained_var: -0.8980711102485657
          vf_loss: 0.0005607606610283256
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,509,5769.66,509000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-09-20_11-24-28
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 512
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.6665614234076607
          entropy_coeff: 0.009999999999999998
          kl: 0.006379127448137961
          policy_loss: -0.06300055562622017
          total_loss: -0.07238388792094257
          vf_explained_var: -0.39234063029289246
          vf_loss: 0.0007352955083155798
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,510,5779.68,510000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-09-20_11-24-39
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 513
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.5797932982444762
          entropy_coeff: 0.009999999999999998
          kl: 0.005988790634144673
          policy_loss: -0.09325747638940811
          total_loss: -0.10263515710830688
          vf_explained_var: -0.5312452912330627
          vf_loss: 0.00027387390808952556
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,511,5790.19,511000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-09-20_11-24-49
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 514
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0263138055801395
          cur_lr: 5.000000000000001e-05
          entropy: 1.398382908768124
          entropy_coeff: 0.009999999999999998
          kl: 0.004601291065526346
          policy_loss: -0.009514384385612275
          total_loss: -0.0183039209081067
          vf_explained_var: 0.09033461660146713
          vf_loss: 0.00047192599910583034
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations_since_restore: 512
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,512,5800.51,512000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-09-20_11-25-00
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 515
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.5777772770987617
          entropy_coeff: 0.009999999999999998
          kl: 0.005348338955796173
          policy_loss: -0.053007147047254774
          total_loss: -0.06574022927218014
          vf_explained_var: -0.7251086831092834
          vf_loss: 0.00030015246698490553
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,513,5810.88,513000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-09-20_11-25-10
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 516
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.1918661402331459
          entropy_coeff: 0.009999999999999998
          kl: 0.010963322369265071
          policy_loss: -0.0115105958448516
          total_loss: -0.0171366466416253
          vf_explained_var: -0.43163490295410156
          vf_loss: 0.0006667075538037655
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  iterations_since_restore: 514
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,514,5821.28,514000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-09-20_11-25-21
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 517
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.5328144484096102
          entropy_coeff: 0.009999999999999998
          kl: 0.01142768529024857
          policy_loss: 0.022441914884580506
          total_loss: 0.013387111864156193
          vf_explained_var: -1.0
          vf_loss: 0.0004091451963176951
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 515
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,515,5831.91,515000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-09-20_11-25-31
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 518
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.628897319899665
          entropy_coeff: 0.009999999999999998
          kl: 0.006806533979984092
          policy_loss: -0.022978492329518
          total_loss: -0.03551363905684816
          vf_explained_var: -0.5173905491828918
          vf_loss: 0.00026100617922363906
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 516
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,516,5842.59,516000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-09-20_11-25-42
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 519
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.7330945465299818
          entropy_coeff: 0.009999999999999998
          kl: 0.009975629475637978
          policy_loss: 0.028659396701388888
          total_loss: 0.016641395828790134
          vf_explained_var: -0.8969638347625732
          vf_loss: 0.00019388094515306875
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 517
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,517,5853.2,517000,-0.08,0,-3,994.5


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-09-20_11-25-53
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 520
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.4553927567270066
          entropy_coeff: 0.009999999999999998
          kl: 0.006881877195698858
          policy_loss: -0.005444410070776939
          total_loss: -0.015463084272212452
          vf_explained_var: -0.45658746361732483
          vf_loss: 0.0010037718892918848
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 518
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,518,5864.02,518000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-09-20_11-26-04
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 521
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.441567956076728
          entropy_coeff: 0.009999999999999998
          kl: 0.02392311381209882
          policy_loss: 0.10582440942525864
          total_loss: 0.10749102847443687
          vf_explained_var: 0.14206422865390778
          vf_loss: 0.0038059894569919886
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 519
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,519,5875.04,519000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-09-20_11-26-15
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 522
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7697353541851042
          cur_lr: 5.000000000000001e-05
          entropy: 1.2415709230634902
          entropy_coeff: 0.009999999999999998
          kl: 0.006488534795576949
          policy_loss: -0.008060435702403387
          total_loss: -0.015019167773425579
          vf_explained_var: -0.5238737463951111
          vf_loss: 0.00046252283063950015
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_since_restore: 520
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,520,5886.07,520000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-09-20_11-26-25
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 523
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7697353541851042
          cur_lr: 5.000000000000001e-05
          entropy: 2.1444126685460407
          entropy_coeff: 0.009999999999999998
          kl: 0.009171064179721987
          policy_loss: -0.09583520938952764
          total_loss: -0.10921166075600518
          vf_explained_var: 0.15495982766151428
          vf_loss: 0.0010083818714418965
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_since_restore: 521
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,521,5896.38,521000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-09-20_11-26-36
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 524
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7697353541851042
          cur_lr: 5.000000000000001e-05
          entropy: 2.0080921954578823
          entropy_coeff: 0.009999999999999998
          kl: 0.00560452430092408
          policy_loss: 0.014195974792043368
          total_loss: -0.0012599003397756152
          vf_explained_var: -0.8451082110404968
          vf_loss: 0.00031104587639371555
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iterations_since_restore: 522
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,522,5907.21,522000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-09-20_11-26-47
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 525
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7697353541851042
          cur_lr: 5.000000000000001e-05
          entropy: 1.0467291169696384
          entropy_coeff: 0.009999999999999998
          kl: 0.003772389808384317
          policy_loss: 0.02110138883193334
          total_loss: 0.013700423017144202
          vf_explained_var: -0.8996247053146362
          vf_loss: 0.00016258378600468858
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_since_restore: 523
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,523,5918.35,523000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-09-20_11-26-58
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 526
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 2.258230945799086
          entropy_coeff: 0.009999999999999998
          kl: 0.017465697522582285
          policy_loss: -0.100302388270696
          total_loss: -0.11553467479017046
          vf_explained_var: 0.1635987013578415
          vf_loss: 0.0006280397742456342
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterations_since_restore: 524
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,524,5928.98,524000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-09-20_11-27-09
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 527
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.636757939391666
          entropy_coeff: 0.009999999999999998
          kl: 0.01579143164753061
          policy_loss: -0.07651057152284516
          total_loss: -0.08459120003713502
          vf_explained_var: -0.09005361050367355
          vf_loss: 0.002209340505457173
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iterations_since_restore: 525
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,525,5939.65,525000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-09-20_11-27-20
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 528
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.5339202576213413
          entropy_coeff: 0.009999999999999998
          kl: 0.018000248004423014
          policy_loss: -0.08972483020689752
          total_loss: -0.09665978004535039
          vf_explained_var: 0.04083935543894768
          vf_loss: 0.0014765404394387993
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iterations_since_restore: 526
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,526,5950.61,526000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-09-20_11-27-30
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 529
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.4745606554879083
          entropy_coeff: 0.009999999999999998
          kl: 0.015339761293147644
          policy_loss: -0.05443092265890704
          total_loss: -0.06206417063044177
          vf_explained_var: -0.2123265117406845
          vf_loss: 0.0012085825521757619
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iterations_since_restore: 527
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,527,5961.22,527000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-09-20_11-27-41
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 530
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.5981004556020102
          entropy_coeff: 0.009999999999999998
          kl: 0.009952463145038198
          policy_loss: 0.00018081259396341112
          total_loss: -0.0108655855887466
          vf_explained_var: -0.32038185000419617
          vf_loss: 0.001104223489528522
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iterations_since_restore: 528
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,528,5972,528000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-09-20_11-27-52
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 531
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.3423578103383382
          entropy_coeff: 0.009999999999999998
          kl: 0.01770146994912939
          policy_loss: -0.03024194824198882
          total_loss: -0.03605713757375876
          vf_explained_var: -0.11908955127000809
          vf_loss: 0.0007956639342915474
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iterations_since_restore: 529
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,529,5982.78,529000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-09-20_11-28-03
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 532
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.3353896366225348
          entropy_coeff: 0.009999999999999998
          kl: 0.007249203879901166
          policy_loss: -0.076660807368656
          total_loss: -0.08684381078928709
          vf_explained_var: -0.8178730607032776
          vf_loss: 0.00038090934774825453
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iterations_since_restore: 530
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,530,5993.75,530000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-09-20_11-28-14
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 533
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.0717774000432756
          entropy_coeff: 0.009999999999999998
          kl: 0.013379570829434565
          policy_loss: -0.1065781257632706
          total_loss: -0.11131133023235533
          vf_explained_var: -0.6532154679298401
          vf_loss: 0.0008352039625202047
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iterations_since_restore: 531
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,531,6004.63,531000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-09-20_11-28-25
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 534
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.55059638288286
          entropy_coeff: 0.009999999999999998
          kl: 0.01254039647907411
          policy_loss: -0.08512485290153159
          total_loss: -0.09424716946151522
          vf_explained_var: -0.4438135027885437
          vf_loss: 0.0015572522139538907
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_since_restore: 532
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,532,6015.42,532000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-09-20_11-28-35
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 535
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.3561682899792988
          entropy_coeff: 0.009999999999999998
          kl: 0.00790134076819804
          policy_loss: -0.03384814324478309
          total_loss: -0.04415994178917673
          vf_explained_var: -0.6531915664672852
          vf_loss: 0.00020891412625335053
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_since_restore: 533
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,533,6026.32,533000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-09-20_11-28-47
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 536
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 2.1525895807478164
          entropy_coeff: 0.009999999999999998
          kl: 0.013862967629126387
          policy_loss: 0.04672346959511439
          total_loss: 0.031234707683324814
          vf_explained_var: -0.2583144009113312
          vf_loss: 0.0007017251558459571
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,534,6037.65,534000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-09-20_11-28-57
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 537
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.3883166948954264
          entropy_coeff: 0.009999999999999998
          kl: 0.009581521111413282
          policy_loss: -0.20479539293381904
          total_loss: -0.21468784858783085
          vf_explained_var: -0.4002222716808319
          vf_loss: 0.0003030939081024068
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,535,6047.72,535000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-09-20_11-29-06
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 538
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8852246562639872
          entropy_coeff: 0.009999999999999998
          kl: 0.01186075918036305
          policy_loss: -0.003050965360469288
          total_loss: -0.01629844597644276
          vf_explained_var: 0.5934047698974609
          vf_loss: 0.0010399439588784137
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_since_restore: 536
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,536,6057.24,536000,-0.08,0,-3,995.88


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-09-20_11-29-17
  done: false
  episode_len_mean: 995.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 539
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.9432046267721388
          entropy_coeff: 0.009999999999999998
          kl: 0.015334062805443851
          policy_loss: -0.014650430240564876
          total_loss: -0.027267475343412822
          vf_explained_var: -0.002612428506836295
          vf_loss: 0.0009134139083067163
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterations_since_restore: 537
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,537,6067.66,537000,-0.08,0,-3,995.88




Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-09-20_11-29-46
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 540
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8117682602670457
          entropy_coeff: 0.009999999999999998
          kl: 0.016696501953821594
          policy_loss: -0.12942402909199396
          total_loss: -0.13890847778982587
          vf_explained_var: 0.1763351410627365
          vf_loss: 0.002207291848672968
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations_since_restore: 538
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,538,6096.47,538000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-09-20_11-29-56
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 541
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7121215674612258
          entropy_coeff: 0.009999999999999998
          kl: 0.01594240503812981
          policy_loss: 0.09455647990107537
          total_loss: 0.08419500736312734
          vf_explained_var: -0.090845987200737
          vf_loss: 0.0006240287202268115
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations_since_restore: 539
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,539,6107.07,539000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-09-20_11-30-07
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 542
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.70541553762224
          entropy_coeff: 0.009999999999999998
          kl: 0.01719036450364027
          policy_loss: -0.05289343570669492
          total_loss: -0.06265258313053185
          vf_explained_var: -0.13467468321323395
          vf_loss: 0.0006789936207092575
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations_since_restore: 540
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,540,6117.77,540000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-09-20_11-30-18
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 543
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.816832889450921
          entropy_coeff: 0.009999999999999998
          kl: 0.012853045966572947
          policy_loss: -0.03327410477730963
          total_loss: -0.04567488647169537
          vf_explained_var: -0.5312064290046692
          vf_loss: 0.0008208250762739529
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations_since_restore: 541
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,541,6128.25,541000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-09-20_11-30-28
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 544
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.9807796968354119
          entropy_coeff: 0.009999999999999998
          kl: 0.018594786426491854
          policy_loss: -0.04231199448307355
          total_loss: -0.054382823490434225
          vf_explained_var: -0.0824044719338417
          vf_loss: 0.0005804346437798813
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_since_restore: 542
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,542,6138.79,542000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-09-20_11-30-39
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 545
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7523143543137445
          entropy_coeff: 0.009999999999999998
          kl: 0.012417981819279679
          policy_loss: -0.06603674126995934
          total_loss: -0.07839805380337768
          vf_explained_var: -0.85426265001297
          vf_loss: 0.00038254941812030865
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations_since_restore: 543
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,543,6150.05,543000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-09-20_11-30-51
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 546
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7095468468136257
          entropy_coeff: 0.009999999999999998
          kl: 0.01041904969195771
          policy_loss: 0.053435915377404955
          total_loss: 0.0407144460413191
          vf_explained_var: -0.892215371131897
          vf_loss: 0.00036404217567501796
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_since_restore: 544
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,544,6161.13,544000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-09-20_11-31-02
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 547
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8644013656510248
          entropy_coeff: 0.009999999999999998
          kl: 0.014029563247372343
          policy_loss: -0.059861661700738804
          total_loss: -0.07238709090484513
          vf_explained_var: -0.5994453430175781
          vf_loss: 0.0007190589919143046
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations_since_restore: 545
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,545,6172.67,545000,-0.08,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-09-20_11-31-13
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 548
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 2.001439356803894
          entropy_coeff: 0.009999999999999998
          kl: 0.012867014605734677
          policy_loss: -0.06792082277437052
          total_loss: -0.08242847683529059
          vf_explained_var: -0.8493210077285767
          vf_loss: 0.0005546420063991617
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_since_restore: 546
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,546,6183.9,546000,-0.07,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-09-20_11-31-25
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 549
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 2.0689183712005614
          entropy_coeff: 0.009999999999999998
          kl: 0.013996529521300694
          policy_loss: -0.09842942961388164
          total_loss: -0.11325607423981031
          vf_explained_var: -1.0
          vf_loss: 0.00047572444390324463
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iterations_since_restore: 547
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,547,6195.05,547000,-0.07,0,-3,994.36


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-09-20_11-31-36
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 550
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7245934353934393
          entropy_coeff: 0.009999999999999998
          kl: 0.008449295139585308
          policy_loss: -0.04700585572669903
          total_loss: -0.06078195008966658
          vf_explained_var: -1.0
          vf_loss: 0.00021797882412405062
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_since_restore: 548
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,548,6206.24,548000,-0.07,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-09-20_11-31-47
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.07
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 551
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6169497556156582
          entropy_coeff: 0.009999999999999998
          kl: 0.020367810172768695
          policy_loss: 0.06054703820910719
          total_loss: 0.05323007247514195
          vf_explained_var: -0.5220958590507507
          vf_loss: 0.0010136188509932253
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_since_restore: 549
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,549,6217.48,549000,-0.07,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-09-20_11-31-58
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 552
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.7242072529262966
          entropy_coeff: 0.009999999999999998
          kl: 0.014292497219541716
          policy_loss: -0.020090293553140428
          total_loss: -0.01824284733997451
          vf_explained_var: -0.377866268157959
          vf_loss: 0.010838438687561494
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_since_restore: 550
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,550,6228.15,550000,-0.08,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-09-20_11-32-09
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 553
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.7081737160682677
          entropy_coeff: 0.009999999999999998
          kl: 0.007168301515776084
          policy_loss: -0.011031638541155391
          total_loss: -0.023730139765474532
          vf_explained_var: -1.0
          vf_loss: 0.0002449624819241257
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations_since_restore: 551
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,551,6239.3,551000,-0.08,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-09-20_11-32-20
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 554
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.8400734146436055
          entropy_coeff: 0.009999999999999998
          kl: 0.005452265554947012
          policy_loss: 0.01879281848669052
          total_loss: 0.006752095537053214
          vf_explained_var: -0.4926300048828125
          vf_loss: 0.003212409313305721
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations_since_restore: 552
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,552,6250.67,552000,-0.08,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-09-20_11-32-31
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 555
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.4225848568810358
          entropy_coeff: 0.009999999999999998
          kl: 0.018803135346409483
          policy_loss: -0.0970512545771069
          total_loss: -0.09857669414745437
          vf_explained_var: -0.11555467545986176
          vf_loss: 0.0018453323802936615
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iterations_since_restore: 553
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,553,6260.98,553000,-0.08,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-09-20_11-32-42
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 556
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.8564880079693264
          entropy_coeff: 0.009999999999999998
          kl: 0.0091101337603472
          policy_loss: -0.004475519350833363
          total_loss: -0.01757411157919301
          vf_explained_var: -1.0
          vf_loss: 0.0002069935872795112
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iterations_since_restore: 554
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,554,6272.11,554000,-0.08,0,-3,995.74


Result for PPO_my_env_cede2_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-09-20_11-32-53
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 557
  experiment_id: 76801b529e5a458f812cf4d6323518b3
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.871549579832289
          entropy_coeff: 0.009999999999999998
          kl: 0.007472600026034115
          policy_loss: -0.032107516336772175
          total_loss: -0.04619272156722016
          vf_explained_var: -1.0
          vf_loss: 0.00031634633439049745
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations_since_restore: 555
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_cede2_00000,RUNNING,192.168.1.100:526121,555,6283.16,555000,-0.08,0,-3,995.74
