In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 64, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 64
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AlinaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained (AlinaCNN)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-09-20 08:00:03,145	INFO wandb.py:170 -- Already logged into W&B.
2021-09-20 08:00:03,164	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_c2935_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=491044)[0m 2021-09-20 08:00:07,163	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=491044)[0m 2021-09-20 08:00:07,163	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-20_08-01-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 0.022018798134781213
          entropy_coeff: 0.009999999999999998
          kl: 0.0034394232049184477
          policy_loss: 0.1774702523317602
          total_loss: 3.7794544246461657
          vf_explained_var: -0.17326650023460388
          vf_loss: 3.601516452100542
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,1,50.5847,1000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-20_08-01-14
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.31149898279044363
          entropy_coeff: 0.009999999999999998
          kl: 0.06106668933769445
          policy_loss: 0.1635347416003545
          total_loss: 2.9910327434539794
          vf_explained_var: -0.10677826404571533
          vf_loss: 2.824506273534563
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,2,61.0665,2000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-20_08-01-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 0.8175328115622202
          entropy_coeff: 0.009999999999999998
          kl: 0.07668064301072693
          policy_loss: 0.1629496521419949
          total_loss: 2.154094625843896
          vf_explained_var: -0.09021443128585815
          vf_loss: 1.9878181808524662
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.1.100
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,3,71.2504,3000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-20_08-01-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0083332816759745
          entropy_coeff: 0.009999999999999998
          kl: 0.013710865566527512
          policy_loss: 0.17209153390593
          total_loss: 1.6450878812207117
          vf_explained_var: -0.09566884487867355
          vf_loss: 1.47999473909537
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.100
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,4,81.3097,4000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-20_08-01-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1137384838528104
          entropy_coeff: 0.009999999999999998
          kl: 0.021606520524539975
          policy_loss: 0.17307312968704436
          total_loss: 1.2429332335789998
          vf_explained_var: -0.02948453091084957
          vf_loss: 1.0761360036002265
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,5,91.3258,5000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-20_08-01-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2394364886813694
          entropy_coeff: 0.009999999999999998
          kl: 0.013457797090596952
          policy_loss: 0.17195199694898394
          total_loss: 0.9491680155197779
          vf_explained_var: -0.016296137124300003
          vf_loss: 0.7850683700707224
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.100
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,6,101.388,6000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-20_08-02-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2699478957388135
          entropy_coeff: 0.009999999999999998
          kl: 0.011165038388378118
          policy_loss: 0.17652304122845333
          total_loss: 0.7251684010028839
          vf_explained_var: -0.015548793599009514
          vf_loss: 0.5575766452484661
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.1.100
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,7,111.422,7000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-20_08-02-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2780763705571492
          entropy_coeff: 0.009999999999999998
          kl: 0.013563860625639555
          policy_loss: 0.1674688673681683
          total_loss: 0.5912710481219822
          vf_explained_var: -0.17794854938983917
          vf_loss: 0.4320051388608085
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,8,121.437,8000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-20_08-02-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0563972201612262
          entropy_coeff: 0.009999999999999998
          kl: 0.007316494324094799
          policy_loss: 0.15939194998807377
          total_loss: 0.45961321973138386
          vf_explained_var: -0.05796094611287117
          vf_loss: 0.30831592025028337
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,9,131.504,9000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-20_08-02-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2102949804729886
          entropy_coeff: 0.009999999999999998
          kl: 0.0022786411755371895
          policy_loss: 0.15560743146472508
          total_loss: 0.3748005219631725
          vf_explained_var: -0.14641399681568146
          vf_loss: 0.23052700319223934
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,10,141.505,10000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-20_08-02-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1389986197153728
          entropy_coeff: 0.009999999999999998
          kl: 0.008861119162583364
          policy_loss: 0.15565405189990997
          total_loss: 0.31676055673095915
          vf_explained_var: -0.4349863827228546
          vf_loss: 0.17100118117200003
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,11,151.542,11000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-20_08-02-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1988748490810395
          entropy_coeff: 0.009999999999999998
          kl: 0.021481492900117912
          policy_loss: 0.1486877203815513
          total_loss: 0.25909436262316177
          vf_explained_var: -0.2776701748371124
          vf_loss: 0.11877038938303788
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,12,161.575,12000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-20_08-03-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4747856338818868
          entropy_coeff: 0.009999999999999998
          kl: 0.01153716016885293
          policy_loss: 0.14169710493750043
          total_loss: 0.22117765761084027
          vf_explained_var: -0.9870458245277405
          vf_loss: 0.09130806403441562
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,13,171.599,13000,0,0,0,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-20_08-03-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2857142857142857
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3392078505622016
          entropy_coeff: 0.009999999999999998
          kl: 0.0031394556067568975
          policy_loss: 0.05454140686326557
          total_loss: 0.13318162982662518
          vf_explained_var: -0.4369364380836487
          vf_loss: 0.09123762659728527
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,14,181.914,14000,-0.285714,0,-4,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-20_08-03-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.26666666666666666
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.684667542245653
          entropy_coeff: 0.009999999999999998
          kl: 0.00625263448764914
          policy_loss: 0.14114322737894125
          total_loss: 0.18808545615110134
          vf_explained_var: -0.8617891669273376
          vf_loss: 0.06299755389077796
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,15,191.916,15000,-0.266667,0,-4,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-20_08-03-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3125
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.747495170434316
          entropy_coeff: 0.009999999999999998
          kl: 0.008934066337062556
          policy_loss: 0.18364473092887137
          total_loss: 0.23413848554094632
          vf_explained_var: -0.20700956881046295
          vf_loss: 0.0668379854824808
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.168.1.100
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,16,201.928,16000,-0.3125,0,-4,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-20_08-03-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7058823529411765
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.597353794839647
          entropy_coeff: 0.009999999999999998
          kl: 0.004588904007252747
          policy_loss: 0.14699883957703908
          total_loss: 0.39683054586251576
          vf_explained_var: -0.17212046682834625
          vf_loss: 0.2652244619197316
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,17,212.201,17000,-0.705882,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-20_08-03-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7777777777777778
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 1.898730927043491
          entropy_coeff: 0.009999999999999998
          kl: 0.0053305900465183634
          policy_loss: 0.16403131554317144
          total_loss: 0.1954755205454098
          vf_explained_var: -0.5987529158592224
          vf_loss: 0.05009418798403607
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,18,222.37,18000,-0.777778,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-20_08-04-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7745839383867053
          entropy_coeff: 0.009999999999999998
          kl: 0.00522929099883659
          policy_loss: 0.07279962305393484
          total_loss: 0.1361388617919551
          vf_explained_var: 0.05862506106495857
          vf_loss: 0.08075415893561311
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,19,232.524,19000,-1,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-20_08-04-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.95
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1947498083114625
          entropy_coeff: 0.009999999999999998
          kl: 0.005469076925727853
          policy_loss: 0.13133073101441065
          total_loss: 0.13953162357211113
          vf_explained_var: -0.03940334543585777
          vf_loss: 0.02980230421655708
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,20,242.559,20000,-0.95,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-20_08-04-26
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9047619047619048
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5823529813024733
          entropy_coeff: 0.009999999999999998
          kl: 0.0114372518874762
          policy_loss: 0.2059796596566836
          total_loss: 0.25784134194254876
          vf_explained_var: 0.21563485264778137
          vf_loss: 0.06696144657002555
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,21,252.572,21000,-0.904762,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-20_08-04-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9090909090909091
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6852154824468824
          entropy_coeff: 0.009999999999999998
          kl: 0.008009050204750956
          policy_loss: -0.027700532641675737
          total_loss: 0.02654635202553537
          vf_explained_var: 0.23346683382987976
          vf_loss: 0.07059221872025066
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,22,262.791,22000,-0.909091,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-20_08-04-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8695652173913043
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1461122737990483
          entropy_coeff: 0.009999999999999998
          kl: 0.0055941965921537905
          policy_loss: 0.09923322192496724
          total_loss: 0.11322268255882793
          vf_explained_var: 0.0606837160885334
          vf_loss: 0.03509657559916377
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,23,272.82,23000,-0.869565,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-20_08-04-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8333333333333334
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 2.2864906655417547
          entropy_coeff: 0.009999999999999998
          kl: 0.005853268741449942
          policy_loss: 0.14158919639885426
          total_loss: 0.13272004851864444
          vf_explained_var: -0.3802328407764435
          vf_loss: 0.013625355044172869
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,24,282.77,24000,-0.833333,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-20_08-05-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06328125
          cur_lr: 5.000000000000001e-05
          entropy: 2.2630078342225817
          entropy_coeff: 0.009999999999999998
          kl: 0.0049790473694316046
          policy_loss: 0.22371155354711744
          total_loss: 0.22317075381676357
          vf_explained_var: -0.13977614045143127
          vf_loss: 0.021774196417795287
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,25,292.854,25000,-0.8,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-20_08-05-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7692307692307693
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.031640625
          cur_lr: 5.000000000000001e-05
          entropy: 2.410488504833645
          entropy_coeff: 0.009999999999999998
          kl: 0.0043584785499291455
          policy_loss: 0.1916356878148185
          total_loss: 0.17704727119869657
          vf_explained_var: -0.010133414529263973
          vf_loss: 0.009378563017687865
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,26,302.808,26000,-0.769231,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-20_08-05-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7407407407407407
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 2.389121323161655
          entropy_coeff: 0.009999999999999998
          kl: 0.004506429238428369
          policy_loss: 0.02354866506324874
          total_loss: 0.02130779309405221
          vf_explained_var: -0.34199732542037964
          vf_loss: 0.02157904789265659
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,27,312.813,27000,-0.740741,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-20_08-05-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.75
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00791015625
          cur_lr: 5.000000000000001e-05
          entropy: 2.4477743016348943
          entropy_coeff: 0.009999999999999998
          kl: 0.005096143594975228
          policy_loss: 0.134293665488561
          total_loss: 0.14215705030494266
          vf_explained_var: 0.13454528152942657
          vf_loss: 0.03230081972562605
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,28,322.815,28000,-0.75,0,-7,1000


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-20_08-05-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7241379310344828
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00791015625
          cur_lr: 5.000000000000001e-05
          entropy: 2.4772468275494046
          entropy_coeff: 0.009999999999999998
          kl: 0.004933931139211486
          policy_loss: 0.046571990392274325
          total_loss: 0.03328599420686563
          vf_explained_var: -0.045847538858652115
          vf_loss: 0.011447444785800245
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,29,332.669,29000,-0.724138,0,-7,1000




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-20_08-06-15
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7666666666666667
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003955078125
          cur_lr: 5.000000000000001e-05
          entropy: 2.2495517280366686
          entropy_coeff: 0.009999999999999998
          kl: 0.004462200511805718
          policy_loss: 0.053642657813098696
          total_loss: 0.10693179807729192
          vf_explained_var: 0.2112397700548172
          vf_loss: 0.0757670065595044
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,30,360.67,30000,-0.766667,0,-7,996.1


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-20_08-06-27
  done: false
  episode_len_mean: 996.2258064516129
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8064516129032258
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 2.43305881023407
          entropy_coeff: 0.009999999999999998
          kl: 0.00321282089867903
          policy_loss: 0.005224992914332284
          total_loss: 0.009745739524563153
          vf_explained_var: -0.18894201517105103
          vf_loss: 0.02884498245289756
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,31,373.421,31000,-0.806452,0,-7,996.226


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-20_08-06-38
  done: false
  episode_len_mean: 996.34375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8125
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00098876953125
          cur_lr: 5.000000000000001e-05
          entropy: 2.4541259659661185
          entropy_coeff: 0.009999999999999998
          kl: 0.004801641937460113
          policy_loss: 0.025960430171754624
          total_loss: 0.0562961146235466
          vf_explained_var: -0.16461525857448578
          vf_loss: 0.054872197119726074
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,32,384.045,32000,-0.8125,0,-7,996.344


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-20_08-06-48
  done: false
  episode_len_mean: 996.4545454545455
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7878787878787878
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000494384765625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5942294041315717
          entropy_coeff: 0.009999999999999998
          kl: 0.0033577603559753182
          policy_loss: 0.07259517659743626
          total_loss: 0.05065483682685428
          vf_explained_var: -0.8574663996696472
          vf_loss: 0.004000297442285551
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,33,394.348,33000,-0.787879,0,-7,996.455


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-20_08-06-59
  done: false
  episode_len_mean: 996.5588235294117
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7941176470588235
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002471923828125
          cur_lr: 5.000000000000001e-05
          entropy: 2.5945683929655288
          entropy_coeff: 0.009999999999999998
          kl: 0.003944450102400212
          policy_loss: 0.0626499234802193
          total_loss: 0.057620112349589664
          vf_explained_var: 0.1980086863040924
          vf_loss: 0.020914899909661874
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,34,404.734,34000,-0.794118,0,-7,996.559


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-20_08-07-09
  done: false
  episode_len_mean: 996.6571428571428
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7714285714285715
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00012359619140625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6227351241641577
          entropy_coeff: 0.009999999999999998
          kl: 0.003386319579118712
          policy_loss: 0.06348902814918095
          total_loss: 0.05361422664589352
          vf_explained_var: -0.1789819896221161
          vf_loss: 0.016352130687381658
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,35,415.232,35000,-0.771429,0,-7,996.657


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-20_08-07-20
  done: false
  episode_len_mean: 996.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.75
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.1798095703125e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.6163765589396157
          entropy_coeff: 0.009999999999999998
          kl: 0.0030236638947436717
          policy_loss: 0.021101816950572862
          total_loss: 0.008784584080179533
          vf_explained_var: -0.2404847890138626
          vf_loss: 0.013846346327207154
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,36,425.576,36000,-0.75,0,-7,996.75


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-20_08-07-30
  done: false
  episode_len_mean: 996.8378378378378
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7297297297297297
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.08990478515625e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.6074061022864448
          entropy_coeff: 0.009999999999999998
          kl: 0.0036677406278750367
          policy_loss: -0.04430919893913799
          total_loss: -0.04836702677938673
          vf_explained_var: -0.5797099471092224
          vf_loss: 0.022016121302213934
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,37,436.025,37000,-0.72973,0,-7,996.838


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-20_08-07-41
  done: false
  episode_len_mean: 996.921052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7105263157894737
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.544952392578125e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.599455073144701
          entropy_coeff: 0.009999999999999998
          kl: 0.0026598222771177897
          policy_loss: 0.10083367923895518
          total_loss: 0.09142110173900922
          vf_explained_var: -0.7548536658287048
          vf_loss: 0.016581935016438364
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,38,446.439,38000,-0.710526,0,-7,996.921


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-20_08-07-51
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.717948717948718
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.724761962890625e-06
          cur_lr: 5.000000000000001e-05
          entropy: 2.607686241467794
          entropy_coeff: 0.009999999999999998
          kl: 0.0036505270332861293
          policy_loss: -0.003344152722921636
          total_loss: -0.002876546937558386
          vf_explained_var: 0.12802450358867645
          vf_loss: 0.026544440960666785
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,39,456.881,39000,-0.717949,0,-7,997


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-20_08-08-02
  done: false
  episode_len_mean: 997.075
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.862380981445312e-06
          cur_lr: 5.000000000000001e-05
          entropy: 2.634295116530524
          entropy_coeff: 0.009999999999999998
          kl: 0.0018090857303911948
          policy_loss: 0.038843577065401605
          total_loss: 0.0214884954608149
          vf_explained_var: -0.4222838580608368
          vf_loss: 0.008987864633349494
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,40,467.473,40000,-0.7,0,-7,997.075


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-20_08-08-12
  done: false
  episode_len_mean: 997.1463414634146
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6829268292682927
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.931190490722656e-06
          cur_lr: 5.000000000000001e-05
          entropy: 2.5879279242621527
          entropy_coeff: 0.009999999999999998
          kl: 0.00496474378313432
          policy_loss: -0.041690555794371496
          total_loss: -0.03265620097517967
          vf_explained_var: -0.08191383630037308
          vf_loss: 0.03491362768949734
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,41,478.102,41000,-0.682927,0,-7,997.146


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-20_08-08-23
  done: false
  episode_len_mean: 997.2142857142857
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6666666666666666
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.65595245361328e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.6238890727361044
          entropy_coeff: 0.009999999999999998
          kl: 0.005850440773731592
          policy_loss: -0.017567849283417068
          total_loss: -0.031578112062480714
          vf_explained_var: -0.41609713435173035
          vf_loss: 0.012228624954716199
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,42,488.728,42000,-0.666667,0,-7,997.214


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-20_08-08-34
  done: false
  episode_len_mean: 997.2790697674419
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6511627906976745
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.65595245361328e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.6103406614727445
          entropy_coeff: 0.009999999999999998
          kl: 0.0032932108870362834
          policy_loss: 0.02693237786491712
          total_loss: 0.011776618328359393
          vf_explained_var: 0.0001791881222743541
          vf_loss: 0.010947646718058321
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,43,500.142,43000,-0.651163,0,-7,997.279


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-20_08-08-46
  done: false
  episode_len_mean: 997.3409090909091
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6363636363636364
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.82797622680664e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.6526761452356973
          entropy_coeff: 0.009999999999999998
          kl: 0.0033307080046174825
          policy_loss: 0.02468040208849642
          total_loss: 0.001723406029244264
          vf_explained_var: -0.6630625128746033
          vf_loss: 0.0035697629739944304
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,44,511.347,44000,-0.636364,0,-7,997.341


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-20_08-08-57
  done: false
  episode_len_mean: 997.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6444444444444445
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.41398811340332e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.6408694081836277
          entropy_coeff: 0.009999999999999998
          kl: 0.005840725317216627
          policy_loss: 0.02731829293900066
          total_loss: 0.016376158677869374
          vf_explained_var: -0.666019082069397
          vf_loss: 0.015466557120412795
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,45,522.631,45000,-0.644444,0,-7,997.4


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-20_08-09-08
  done: false
  episode_len_mean: 997.4565217391304
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6304347826086957
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.41398811340332e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.6430564906862046
          entropy_coeff: 0.009999999999999998
          kl: 0.003425667961617397
          policy_loss: 0.09955087055762608
          total_loss: 0.07599424752924178
          vf_explained_var: -0.007962530478835106
          vf_loss: 0.0028739433949037147
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,46,533.468,46000,-0.630435,0,-7,997.457


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-20_08-09-18
  done: false
  episode_len_mean: 997.5106382978723
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6170212765957447
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.20699405670166e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.606099149915907
          entropy_coeff: 0.009999999999999998
          kl: 0.004383823368592596
          policy_loss: -0.025231662930713758
          total_loss: -0.04715385536352793
          vf_explained_var: -0.42673152685165405
          vf_loss: 0.00413879830028034
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,47,544.051,47000,-0.617021,0,-7,997.511


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-20_08-09-29
  done: false
  episode_len_mean: 997.5625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6041666666666666
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.0349702835083e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.563018568356832
          entropy_coeff: 0.009999999999999998
          kl: 0.0044880991155320914
          policy_loss: -0.10218354115883509
          total_loss: -0.12624327407942879
          vf_explained_var: -0.3741062581539154
          vf_loss: 0.00157045147045412
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,48,554.258,48000,-0.604167,0,-7,997.562


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-20_08-09-39
  done: false
  episode_len_mean: 997.6122448979592
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5918367346938775
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.01748514175415e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.4522846115960015
          entropy_coeff: 0.009999999999999998
          kl: 0.007566481462875036
          policy_loss: -0.02095703726841344
          total_loss: -0.043783159222867754
          vf_explained_var: -0.4392518699169159
          vf_loss: 0.0016967253785373437
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,49,564.64,49000,-0.591837,0,-7,997.612


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-20_08-09-49
  done: false
  episode_len_mean: 997.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.58
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.01748514175415e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.396558992067973
          entropy_coeff: 0.009999999999999998
          kl: 0.009520673262798042
          policy_loss: -0.09874415770173073
          total_loss: -0.12181288715865878
          vf_explained_var: -1.0
          vf_loss: 0.0008968593895487074
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.1.100
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,50,574.928,50000,-0.58,0,-7,997.66


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-20_08-10-00
  done: false
  episode_len_mean: 997.7058823529412
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5686274509803921
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.01748514175415e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.49154744942983
          entropy_coeff: 0.009999999999999998
          kl: 0.0049596122725674855
          policy_loss: 0.004612130526867177
          total_loss: -0.018336312452124224
          vf_explained_var: -1.0
          vf_loss: 0.001967031121926589
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,51,585.21,51000,-0.568627,0,-7,997.706


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-20_08-10-10
  done: false
  episode_len_mean: 997.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5576923076923077
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.508742570877075e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.4782655821906197
          entropy_coeff: 0.009999999999999998
          kl: 0.006350309090825289
          policy_loss: 0.005345335437191857
          total_loss: -0.017293677603205045
          vf_explained_var: -0.8284940123558044
          vf_loss: 0.0021436419389728044
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,52,595.525,52000,-0.557692,0,-7,997.75


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-20_08-10-20
  done: false
  episode_len_mean: 997.7924528301887
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5471698113207547
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.508742570877075e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.4893258306715222
          entropy_coeff: 0.009999999999999998
          kl: 0.007009206009377205
          policy_loss: 0.017355420026514266
          total_loss: -0.0045830267998907305
          vf_explained_var: -0.5620169043540955
          vf_loss: 0.00295481108672296
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,53,605.837,53000,-0.54717,0,-7,997.792


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-20_08-10-31
  done: false
  episode_len_mean: 997.8333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5370370370370371
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.508742570877075e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.4940910074445934
          entropy_coeff: 0.009999999999999998
          kl: 0.006454489497543816
          policy_loss: -0.0008093346738153033
          total_loss: -0.025087586335009997
          vf_explained_var: -1.0
          vf_loss: 0.0006626592114722977
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,54,616.187,54000,-0.537037,0,-7,997.833


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-20_08-10-41
  done: false
  episode_len_mean: 997.8727272727273
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5272727272727272
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.508742570877075e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.4805932389365304
          entropy_coeff: 0.009999999999999998
          kl: 0.0032320657087816012
          policy_loss: 0.08360755985809697
          total_loss: 0.05969323449664646
          vf_explained_var: -0.9249483942985535
          vf_loss: 0.0008916064023247195
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,55,626.477,55000,-0.527273,0,-7,997.873


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-20_08-10-51
  done: false
  episode_len_mean: 997.9107142857143
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5178571428571429
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.543712854385376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5066241794162325
          entropy_coeff: 0.009999999999999998
          kl: 0.006547137022594433
          policy_loss: -0.022917093667719098
          total_loss: -0.04716400156418483
          vf_explained_var: -0.6797129511833191
          vf_loss: 0.0008193339075660333
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,56,636.81,56000,-0.517857,0,-7,997.911


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-20_08-11-02
  done: false
  episode_len_mean: 997.9473684210526
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5087719298245614
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.543712854385376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4600851880179513
          entropy_coeff: 0.009999999999999998
          kl: 0.009322396776339737
          policy_loss: 0.07000961775581042
          total_loss: 0.04703489931093322
          vf_explained_var: -0.2718396782875061
          vf_loss: 0.0016261326149106027
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,57,647.179,57000,-0.508772,0,-7,997.947


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-20_08-11-12
  done: false
  episode_len_mean: 997.9827586206897
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.543712854385376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.407454193962945
          entropy_coeff: 0.009999999999999998
          kl: 0.006754086248624246
          policy_loss: -0.04098386826614539
          total_loss: -0.06415992106000583
          vf_explained_var: -0.5033674836158752
          vf_loss: 0.0008984869688801054
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,58,657.434,58000,-0.5,0,-7,997.983


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-20_08-11-22
  done: false
  episode_len_mean: 998.0169491525423
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4915254237288136
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.543712854385376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3660900433858236
          entropy_coeff: 0.009999999999999998
          kl: 0.0069634049309932385
          policy_loss: -0.011804090812802315
          total_loss: -0.03491983099116219
          vf_explained_var: -1.0
          vf_loss: 0.0005451611912576481
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,59,667.663,59000,-0.491525,0,-7,998.017




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-20_08-11-49
  done: false
  episode_len_mean: 995.8333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.48333333333333334
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.543712854385376e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.341927337646484
          entropy_coeff: 0.009999999999999998
          kl: 0.0044936869117049895
          policy_loss: -0.07028319984674454
          total_loss: -0.09268478949864706
          vf_explained_var: -0.37072455883026123
          vf_loss: 0.0010176821269043204
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,60,694.469,60000,-0.483333,0,-7,995.833


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-20_08-12-01
  done: false
  episode_len_mean: 995.9016393442623
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47540983606557374
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.343045269118415
          entropy_coeff: 0.009999999999999998
          kl: 0.009203696403490813
          policy_loss: 0.004568339222007328
          total_loss: -0.017843621803654564
          vf_explained_var: -1.0
          vf_loss: 0.0010184908880748684
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,61,706.642,61000,-0.47541,0,-7,995.902


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-20_08-12-12
  done: false
  episode_len_mean: 995.9677419354839
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.46774193548387094
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.225768786006504
          entropy_coeff: 0.009999999999999998
          kl: 0.005976327489370201
          policy_loss: 0.011694894482692083
          total_loss: -0.009778943947619863
          vf_explained_var: -1.0
          vf_loss: 0.0007838500754183365
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,62,717.128,62000,-0.467742,0,-7,995.968


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-20_08-12-22
  done: false
  episode_len_mean: 996.031746031746
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4603174603174603
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2695210960176255
          entropy_coeff: 0.009999999999999998
          kl: 0.008859352032280377
          policy_loss: 0.0026343086113532386
          total_loss: -0.018822953146364955
          vf_explained_var: -1.0
          vf_loss: 0.0012379492109175771
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,63,727.499,63000,-0.460317,0,-7,996.032


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-20_08-12-33
  done: false
  episode_len_mean: 996.09375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.453125
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2620367871390448
          entropy_coeff: 0.009999999999999998
          kl: 0.00896426176533386
          policy_loss: -0.025816174927684996
          total_loss: -0.04783390805953079
          vf_explained_var: -1.0
          vf_loss: 0.000602634072290837
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,64,737.834,64000,-0.453125,0,-7,996.094


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-20_08-12-43
  done: false
  episode_len_mean: 996.1538461538462
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4461538461538462
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.140932310952081
          entropy_coeff: 0.009999999999999998
          kl: 0.0074745019041894045
          policy_loss: -0.056425204707516566
          total_loss: -0.07196475822064612
          vf_explained_var: -0.19392672181129456
          vf_loss: 0.005869763592878978
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,65,748.131,65000,-0.446154,0,-7,996.154


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-20_08-12-53
  done: false
  episode_len_mean: 996.2121212121212
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4393939393939394
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2290892548031276
          entropy_coeff: 0.009999999999999998
          kl: 0.007955580341179847
          policy_loss: 0.011386701050731871
          total_loss: -0.010012870033582052
          vf_explained_var: -1.0
          vf_loss: 0.000891325483745378
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,66,758.344,66000,-0.439394,0,-7,996.212


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-20_08-13-03
  done: false
  episode_len_mean: 996.2686567164179
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.43283582089552236
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1579929245842826
          entropy_coeff: 0.009999999999999998
          kl: 0.0062775401408358845
          policy_loss: -0.03607189647025532
          total_loss: -0.05655425894591543
          vf_explained_var: -0.8169688582420349
          vf_loss: 0.0010975623493626092
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,67,768.606,67000,-0.432836,0,-7,996.269


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-20_08-13-14
  done: false
  episode_len_mean: 996.3235294117648
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4264705882352941
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.270199267069499
          entropy_coeff: 0.009999999999999998
          kl: 0.010006589327236606
          policy_loss: 0.01935245560275184
          total_loss: -0.0023657381534576416
          vf_explained_var: -0.912720263004303
          vf_loss: 0.000983799349827071
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,68,778.839,68000,-0.426471,0,-7,996.324


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-20_08-13-24
  done: false
  episode_len_mean: 996.3768115942029
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4492753623188406
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.32478064166175
          entropy_coeff: 0.009999999999999998
          kl: 0.012718389822530598
          policy_loss: 0.037521656850973764
          total_loss: 0.05767673833502664
          vf_explained_var: -0.6081134080886841
          vf_loss: 0.04340289104147814
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,69,789.123,69000,-0.449275,0,-7,996.377


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-20_08-13-34
  done: false
  episode_len_mean: 996.4285714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.44285714285714284
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3296514881981745
          entropy_coeff: 0.009999999999999998
          kl: 0.008392229222181078
          policy_loss: -0.05651889091564549
          total_loss: -0.07780311575366392
          vf_explained_var: -0.8348697423934937
          vf_loss: 0.0020122889804446865
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,70,799.328,70000,-0.442857,0,-7,996.429


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-20_08-13-44
  done: false
  episode_len_mean: 996.4788732394367
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.43661971830985913
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4168527126312256
          entropy_coeff: 0.009999999999999998
          kl: 0.009188476875546078
          policy_loss: 0.07025119724373023
          total_loss: 0.048109274274773066
          vf_explained_var: -1.0
          vf_loss: 0.002026602109738936
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,71,809.5,71000,-0.43662,0,-7,996.479


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-20_08-13-55
  done: false
  episode_len_mean: 996.5277777777778
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4305555555555556
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.311740851402283
          entropy_coeff: 0.009999999999999998
          kl: 0.0069783834039981426
          policy_loss: 0.037489894446399476
          total_loss: 0.016251401354869206
          vf_explained_var: -0.7034655213356018
          vf_loss: 0.0018789133846035433
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,72,819.789,72000,-0.430556,0,-7,996.528


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-20_08-14-05
  done: false
  episode_len_mean: 996.5753424657535
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4246575342465753
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3526579750908745
          entropy_coeff: 0.009999999999999998
          kl: 0.009866462119097955
          policy_loss: -0.040647859871387484
          total_loss: -0.060583584590090646
          vf_explained_var: -0.9168829321861267
          vf_loss: 0.0035908573617537817
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,73,830.095,73000,-0.424658,0,-7,996.575


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-20_08-14-15
  done: false
  episode_len_mean: 996.6216216216217
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4189189189189189
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3259214798609418
          entropy_coeff: 0.009999999999999998
          kl: 0.006409192282577752
          policy_loss: -0.053664098266098234
          total_loss: -0.07474372660120328
          vf_explained_var: -1.0
          vf_loss: 0.002179587004421693
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,74,840.274,74000,-0.418919,0,-7,996.622


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-20_08-14-26
  done: false
  episode_len_mean: 996.6666666666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.41333333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3124622080061172
          entropy_coeff: 0.009999999999999998
          kl: 0.006905447221724619
          policy_loss: -0.0025403141975402833
          total_loss: -0.02465064637362957
          vf_explained_var: -0.7017570734024048
          vf_loss: 0.0010142901207372131
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,75,850.565,75000,-0.413333,0,-7,996.667


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-20_08-14-36
  done: false
  episode_len_mean: 996.7105263157895
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.40789473684210525
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3317090935177274
          entropy_coeff: 0.009999999999999998
          kl: 0.005974546825499989
          policy_loss: -0.01819885340001848
          total_loss: -0.03952925900618235
          vf_explained_var: -1.0
          vf_loss: 0.0019866858350319995
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,76,860.885,76000,-0.407895,0,-7,996.711


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-20_08-14-46
  done: false
  episode_len_mean: 996.7532467532468
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4025974025974026
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.285791293780009
          entropy_coeff: 0.009999999999999998
          kl: 0.00896986479547035
          policy_loss: -0.04017348363995552
          total_loss: -0.061470022052526475
          vf_explained_var: -0.9925647377967834
          vf_loss: 0.0015613742851807424
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,77,871.142,77000,-0.402597,0,-7,996.753


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-20_08-14-56
  done: false
  episode_len_mean: 996.7948717948718
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3974358974358974
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.311977055337694
          entropy_coeff: 0.009999999999999998
          kl: 0.007199188128424201
          policy_loss: 0.015399049636390475
          total_loss: -0.005941364955570963
          vf_explained_var: -1.0
          vf_loss: 0.0017793565768645042
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,78,881.373,78000,-0.397436,0,-7,996.795


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-20_08-15-07
  done: false
  episode_len_mean: 996.8354430379746
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3924050632911392
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2699741151597763
          entropy_coeff: 0.009999999999999998
          kl: 0.005093623619080922
          policy_loss: -0.021172884561949306
          total_loss: -0.041256869170400834
          vf_explained_var: -0.9520399570465088
          vf_loss: 0.0026157576551971338
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,79,891.552,79000,-0.392405,0,-7,996.835


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-20_08-15-17
  done: false
  episode_len_mean: 996.875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3875
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3753440618515014
          entropy_coeff: 0.009999999999999998
          kl: 0.01164003873105253
          policy_loss: -0.012967914839585622
          total_loss: -0.03511683642864227
          vf_explained_var: -1.0
          vf_loss: 0.001604520180909377
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,80,901.722,80000,-0.3875,0,-7,996.875


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-20_08-15-27
  done: false
  episode_len_mean: 996.9135802469136
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38271604938271603
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.423143574926588
          entropy_coeff: 0.009999999999999998
          kl: 0.008311323280612962
          policy_loss: 0.05834260479443603
          total_loss: 0.03505118313348955
          vf_explained_var: -1.0
          vf_loss: 0.0009400136261117748
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,81,912.092,81000,-0.382716,0,-7,996.914


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-20_08-15-37
  done: false
  episode_len_mean: 996.9512195121952
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3780487804878049
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.393906916512383
          entropy_coeff: 0.009999999999999998
          kl: 0.005436133746480346
          policy_loss: 0.023520473080376785
          total_loss: 0.0014544651119245424
          vf_explained_var: -0.9472568035125732
          vf_loss: 0.0018730602110736071
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,82,922.336,82000,-0.378049,0,-7,996.951


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-20_08-15-48
  done: false
  episode_len_mean: 996.9879518072289
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.37349397590361444
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4079736603630915
          entropy_coeff: 0.009999999999999998
          kl: 0.007251651489575018
          policy_loss: -0.05303229424688551
          total_loss: -0.07557375319302082
          vf_explained_var: -1.0
          vf_loss: 0.0015382776386104525
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,83,932.538,83000,-0.373494,0,-7,996.988


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-20_08-15-58
  done: false
  episode_len_mean: 997.0238095238095
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36904761904761907
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4894649585088096
          entropy_coeff: 0.009999999999999998
          kl: 0.009242237107236997
          policy_loss: -0.02370550466908349
          total_loss: -0.04771795065866576
          vf_explained_var: -0.9340959191322327
          vf_loss: 0.0008822037883672036
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,84,942.794,84000,-0.369048,0,-7,997.024


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-20_08-16-08
  done: false
  episode_len_mean: 997.0588235294117
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36470588235294116
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4620691935221353
          entropy_coeff: 0.009999999999999998
          kl: 0.009961547303179389
          policy_loss: -0.008217651603950394
          total_loss: -0.031658163087235555
          vf_explained_var: -1.0
          vf_loss: 0.001180177493693514
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,85,952.968,85000,-0.364706,0,-7,997.059


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-20_08-16-18
  done: false
  episode_len_mean: 997.0930232558139
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36046511627906974
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4973319186104668
          entropy_coeff: 0.009999999999999998
          kl: 0.006549659840126528
          policy_loss: -0.02188535432020823
          total_loss: -0.04248883260620965
          vf_explained_var: -0.5554831624031067
          vf_loss: 0.004369839808593194
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,86,963.113,86000,-0.360465,0,-7,997.093


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-20_08-16-29
  done: false
  episode_len_mean: 997.1264367816092
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3563218390804598
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4973976479636297
          entropy_coeff: 0.009999999999999998
          kl: 0.007758554512635197
          policy_loss: 0.021103145678838094
          total_loss: -0.0014205592374006907
          vf_explained_var: -0.9979952573776245
          vf_loss: 0.002450269859077202
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,87,973.31,87000,-0.356322,0,-7,997.126


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-20_08-16-39
  done: false
  episode_len_mean: 997.1590909090909
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3522727272727273
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4578857925203113
          entropy_coeff: 0.009999999999999998
          kl: 0.008620168077159502
          policy_loss: -0.011294388481312327
          total_loss: -0.03446149014764362
          vf_explained_var: -0.6831714510917664
          vf_loss: 0.0014117568640762733
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,88,983.566,88000,-0.352273,0,-7,997.159


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-20_08-16-49
  done: false
  episode_len_mean: 997.1910112359551
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.34831460674157305
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3884135431713527
          entropy_coeff: 0.009999999999999998
          kl: 0.010777655559550285
          policy_loss: -0.03544772697819604
          total_loss: -0.057188831683662206
          vf_explained_var: -0.3412262499332428
          vf_loss: 0.0021430302010331716
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,89,993.882,89000,-0.348315,0,-7,997.191




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-20_08-17-19
  done: false
  episode_len_mean: 995.6888888888889
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.34444444444444444
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3614973889456854
          entropy_coeff: 0.009999999999999998
          kl: 0.00947310225095436
          policy_loss: -0.019171862511171235
          total_loss: -0.04046649742457602
          vf_explained_var: -0.3211243152618408
          vf_loss: 0.002320340954853843
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,90,1023.53,90000,-0.344444,0,-7,995.689


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-20_08-17-29
  done: false
  episode_len_mean: 995.7362637362637
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.34065934065934067
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.336166485150655
          entropy_coeff: 0.009999999999999998
          kl: 0.009497544788770303
          policy_loss: -0.005156326790650686
          total_loss: -0.027258215182357364
          vf_explained_var: -0.5011658072471619
          vf_loss: 0.0012597763910889626
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,91,1034.09,91000,-0.340659,0,-7,995.736


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-20_08-17-40
  done: false
  episode_len_mean: 995.7826086956521
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.33695652173913043
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3213805701997545
          entropy_coeff: 0.009999999999999998
          kl: 0.013437086693134306
          policy_loss: -0.020886100269854067
          total_loss: -0.04268429506984022
          vf_explained_var: -0.15170928835868835
          vf_loss: 0.001415611332696345
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,92,1044.72,92000,-0.336957,0,-7,995.783


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-20_08-17-51
  done: false
  episode_len_mean: 995.8279569892474
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.310570987065633
          entropy_coeff: 0.009999999999999998
          kl: 0.005549898433668692
          policy_loss: -0.01713656120830112
          total_loss: -0.03518726598057482
          vf_explained_var: -0.4510866701602936
          vf_loss: 0.005055005504982546
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,93,1055.4,93000,-0.333333,0,-7,995.828


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-20_08-18-02
  done: false
  episode_len_mean: 995.8723404255319
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.32978723404255317
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.298720587624444
          entropy_coeff: 0.009999999999999998
          kl: 0.009597911752083712
          policy_loss: -0.05044111348688603
          total_loss: -0.0700981990330749
          vf_explained_var: -0.9952139258384705
          vf_loss: 0.003330121065179507
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,94,1066.97,94000,-0.329787,0,-7,995.872


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-20_08-18-14
  done: false
  episode_len_mean: 995.9157894736842
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3263157894736842
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.390142363972134
          entropy_coeff: 0.009999999999999998
          kl: 0.008053898306118414
          policy_loss: -0.07536174398329523
          total_loss: -0.09714563803540335
          vf_explained_var: -0.10428398102521896
          vf_loss: 0.0021175319264228974
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,95,1078.45,95000,-0.326316,0,-7,995.916


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-20_08-18-25
  done: false
  episode_len_mean: 995.9583333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3229166666666667
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4035850445429485
          entropy_coeff: 0.009999999999999998
          kl: 0.0062040300603096385
          policy_loss: -0.05837814536773496
          total_loss: -0.0775292608473036
          vf_explained_var: -0.4916500151157379
          vf_loss: 0.004884733422659338
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,96,1089.54,96000,-0.322917,0,-7,995.958


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-20_08-18-36
  done: false
  episode_len_mean: 996.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31958762886597936
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4282044993506537
          entropy_coeff: 0.009999999999999998
          kl: 0.007944600790457043
          policy_loss: -0.05792387790150112
          total_loss: -0.07877527756823434
          vf_explained_var: 0.04478144273161888
          vf_loss: 0.003430644024370445
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,97,1100.33,97000,-0.319588,0,-7,996


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-20_08-18-47
  done: false
  episode_len_mean: 996.0408163265306
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3163265306122449
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4130851533677844
          entropy_coeff: 0.009999999999999998
          kl: 0.01086137503349489
          policy_loss: -0.06538792457431555
          total_loss: -0.08698751351071729
          vf_explained_var: 0.04233058914542198
          vf_loss: 0.002531261320432855
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,98,1111.74,98000,-0.316327,0,-7,996.041


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-20_08-18-58
  done: false
  episode_len_mean: 996.0808080808081
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31313131313131315
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.771856427192688e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.459831131829156
          entropy_coeff: 0.009999999999999998
          kl: 0.00492285153252995
          policy_loss: -0.03830772216121356
          total_loss: -0.058446320394674935
          vf_explained_var: 0.2543487250804901
          vf_loss: 0.004459710992200093
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,99,1122.82,99000,-0.313131,0,-7,996.081


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-20_08-19-10
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.407797100808885
          entropy_coeff: 0.009999999999999998
          kl: 0.009938065341927625
          policy_loss: 0.021529551388488874
          total_loss: 0.0004727210021681256
          vf_explained_var: -0.9533372521400452
          vf_loss: 0.003021141794872367
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,100,1134.21,100000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-20_08-19-21
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.166332573360867
          entropy_coeff: 0.009999999999999998
          kl: 0.01071262888117409
          policy_loss: -0.059672012304266296
          total_loss: -0.07673635184764863
          vf_explained_var: -0.3182019591331482
          vf_loss: 0.004598987041713877
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,101,1145.55,101000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-20_08-19-33
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4912734005186294
          entropy_coeff: 0.009999999999999998
          kl: 0.007961392025452657
          policy_loss: -0.004550357825226254
          total_loss: -0.027579569588932727
          vf_explained_var: -0.8101011514663696
          vf_loss: 0.001883521488505519
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,102,1156.9,102000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-20_08-19-43
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5974363062116836
          entropy_coeff: 0.009999999999999998
          kl: 0.005813186456988846
          policy_loss: 0.04096648229493035
          total_loss: 0.016844936501648693
          vf_explained_var: -0.8718859553337097
          vf_loss: 0.0018528192148854336
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,103,1167.67,103000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-20_08-19-54
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3487610896428426
          entropy_coeff: 0.009999999999999998
          kl: 0.007678505016726649
          policy_loss: -0.08207566307650672
          total_loss: -0.10298087443742487
          vf_explained_var: -0.7661811709403992
          vf_loss: 0.002582395786885172
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,104,1178.49,104000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-20_08-20-05
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2409796635309855
          entropy_coeff: 0.009999999999999998
          kl: 0.011619050387981547
          policy_loss: -0.019252650688091913
          total_loss: -0.039628353011276984
          vf_explained_var: -0.5301523208618164
          vf_loss: 0.0020340911423166593
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,105,1189.3,105000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-20_08-20-16
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2928194284439085
          entropy_coeff: 0.009999999999999998
          kl: 0.009670750261164922
          policy_loss: -0.013915000690354241
          total_loss: -0.03567327401704258
          vf_explained_var: -0.3911290168762207
          vf_loss: 0.0011699161421145415
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,106,1200.08,106000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-20_08-20-27
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4707800971137153
          entropy_coeff: 0.009999999999999998
          kl: 0.009796295360820098
          policy_loss: -0.01419802059729894
          total_loss: -0.03721649216281043
          vf_explained_var: -0.9703497886657715
          vf_loss: 0.0016893265096263753
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,107,1211.68,107000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-20_08-20-39
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.460733411047194
          entropy_coeff: 0.009999999999999998
          kl: 0.008936316653784833
          policy_loss: -0.014189276264773475
          total_loss: -0.03538565817806456
          vf_explained_var: -0.5804853439331055
          vf_loss: 0.003410951983338843
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,108,1222.87,108000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-20_08-20-49
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5318707492616443
          entropy_coeff: 0.009999999999999998
          kl: 0.007896944803522644
          policy_loss: -0.05707492006735669
          total_loss: -0.07614329257566067
          vf_explained_var: -0.4392918348312378
          vf_loss: 0.006250337845024963
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,109,1233.55,109000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-20_08-21-00
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5249687088860404
          entropy_coeff: 0.009999999999999998
          kl: 0.005431909518442287
          policy_loss: -0.021516631957557465
          total_loss: -0.04512176480558183
          vf_explained_var: -0.27421262860298157
          vf_loss: 0.0016445547704481416
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,110,1244.41,110000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-20_08-21-11
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.563526082038879
          entropy_coeff: 0.009999999999999998
          kl: 0.008941194322795158
          policy_loss: -0.06758573887248834
          total_loss: -0.08726409073505137
          vf_explained_var: -0.7841947674751282
          vf_loss: 0.005956912353738315
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,111,1255.28,111000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-20_08-21-23
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3846345716052584
          entropy_coeff: 0.009999999999999998
          kl: 0.010708488648727057
          policy_loss: -0.05965133814348115
          total_loss: -0.08130778479907248
          vf_explained_var: 0.29149097204208374
          vf_loss: 0.0021898989927851492
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,112,1266.62,112000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-20_08-21-34
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5292024930318195
          entropy_coeff: 0.009999999999999998
          kl: 0.005243174652156673
          policy_loss: -0.10319898492760128
          total_loss: -0.12613017608722052
          vf_explained_var: -0.7759581804275513
          vf_loss: 0.0023608340467843746
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,113,1277.89,113000,-0.31,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-20_08-21-45
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.27
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.521559739112854
          entropy_coeff: 0.009999999999999998
          kl: 0.009452891556116493
          policy_loss: -0.09557699664599366
          total_loss: -0.1180275296792388
          vf_explained_var: 0.2908668518066406
          vf_loss: 0.002765064244158566
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,114,1288.69,114000,-0.27,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-20_08-21-56
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.27
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.451542928483751
          entropy_coeff: 0.009999999999999998
          kl: 0.010848186253285045
          policy_loss: -0.0478959514035119
          total_loss: -0.07100737326674991
          vf_explained_var: 0.0582311637699604
          vf_loss: 0.0014040043823317521
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,115,1299.78,115000,-0.27,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-20_08-22-07
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.26
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.321795916557312
          entropy_coeff: 0.009999999999999998
          kl: 0.010181037822126548
          policy_loss: -0.10169545950161087
          total_loss: -0.12291962603727977
          vf_explained_var: 0.05604342743754387
          vf_loss: 0.0019937909583354163
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,116,1310.55,116000,-0.26,0,-7,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-20_08-22-18
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.376545940505134
          entropy_coeff: 0.009999999999999998
          kl: 0.013237263341726329
          policy_loss: -0.007206623173422284
          total_loss: -0.0281035249431928
          vf_explained_var: -0.47286251187324524
          vf_loss: 0.002868556986666388
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,117,1321.71,117000,-0.19,0,-5,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-20_08-22-29
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4730664094289145
          entropy_coeff: 0.009999999999999998
          kl: 0.007256819351579555
          policy_loss: -0.09269964901937379
          total_loss: -0.11517847213480208
          vf_explained_var: 0.10354287922382355
          vf_loss: 0.002251840627286583
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,118,1332.96,118000,-0.17,0,-5,996.12


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-20_08-22-41
  done: false
  episode_len_mean: 996.12
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5283557494481403
          entropy_coeff: 0.009999999999999998
          kl: 0.0091202894687537
          policy_loss: 0.029273948156171375
          total_loss: 0.0067429322335455155
          vf_explained_var: -0.2846772074699402
          vf_loss: 0.002752542248668356
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,119,1344.64,119000,-0.12,0,-2,996.12




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-20_08-23-10
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.53196083439721
          entropy_coeff: 0.009999999999999998
          kl: 0.007216053159211766
          policy_loss: 0.017658105492591857
          total_loss: -0.005680787563323975
          vf_explained_var: -0.3531745374202728
          vf_loss: 0.001980716921389103
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,120,1373.45,120000,-0.12,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-20_08-23-22
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.481657531526354
          entropy_coeff: 0.009999999999999998
          kl: 0.008435878821530057
          policy_loss: -0.09491962186164327
          total_loss: -0.11757861582769288
          vf_explained_var: -0.054260995239019394
          vf_loss: 0.0021575798047706483
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,121,1386.24,121000,-0.12,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-20_08-23-34
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4536408556832208
          entropy_coeff: 0.009999999999999998
          kl: 0.007120768245653656
          policy_loss: -0.14534527775314118
          total_loss: -0.1679513629939821
          vf_explained_var: -0.8324922323226929
          vf_loss: 0.0019303242236168847
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,122,1397.44,122000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-20_08-23-44
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4102812502119275
          entropy_coeff: 0.009999999999999998
          kl: 0.007508642071127407
          policy_loss: -0.07172692451212141
          total_loss: -0.09430113616916869
          vf_explained_var: -0.0654960349202156
          vf_loss: 0.0015286014874517503
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,123,1408.15,123000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-20_08-23-55
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4685685820049708
          entropy_coeff: 0.009999999999999998
          kl: 0.010960985495339848
          policy_loss: -0.03624789483017392
          total_loss: -0.05943827960226271
          vf_explained_var: 0.029352815821766853
          vf_loss: 0.0014953026910208994
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,124,1419.04,124000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-20_08-24-06
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.36182119846344
          entropy_coeff: 0.009999999999999998
          kl: 0.00909957723469981
          policy_loss: 0.011405403249793583
          total_loss: -0.008481340772575802
          vf_explained_var: -0.28132882714271545
          vf_loss: 0.003731466928083036
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,125,1429.88,125000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-20_08-24-17
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3566361559761897
          entropy_coeff: 0.009999999999999998
          kl: 0.010086306778129245
          policy_loss: -0.043920949432584976
          total_loss: -0.06572769482930502
          vf_explained_var: 0.4685540497303009
          vf_loss: 0.0017596160152202678
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,126,1440.45,126000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-20_08-24-27
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.256294083595276
          entropy_coeff: 0.009999999999999998
          kl: 0.007650280629792622
          policy_loss: -0.0760901118732161
          total_loss: -0.0946580182760954
          vf_explained_var: -0.057296041399240494
          vf_loss: 0.003995035354617155
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,127,1451.02,127000,-0.11,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-20_08-24-38
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3842038684421114
          entropy_coeff: 0.009999999999999998
          kl: 0.01338531042120326
          policy_loss: 0.08450147766206
          total_loss: 0.06240774289601379
          vf_explained_var: 0.14106975495815277
          vf_loss: 0.001748302799468446
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,128,1461.7,128000,-0.1,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-20_08-24-49
  done: false
  episode_len_mean: 994.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4835367812050713
          entropy_coeff: 0.009999999999999998
          kl: 0.010977404613042117
          policy_loss: 0.03789378661248419
          total_loss: 0.014125591930415895
          vf_explained_var: -0.10052572935819626
          vf_loss: 0.0010671721087419429
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,129,1472.29,129000,-0.1,0,-2,994.67


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-20_08-24-59
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.08
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3506083541446263
          entropy_coeff: 0.009999999999999998
          kl: 0.015150318932573542
          policy_loss: 0.12736796364188194
          total_loss: 0.10672159675094817
          vf_explained_var: 0.5085309743881226
          vf_loss: 0.00285971449338831
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,130,1482.89,130000,-0.08,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-20_08-25-10
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3464336580700342
          entropy_coeff: 0.009999999999999998
          kl: 0.008941185776151765
          policy_loss: -0.16854503854281372
          total_loss: -0.1897607916345199
          vf_explained_var: 0.3250294029712677
          vf_loss: 0.0022485808689250712
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,131,1493.56,131000,-0.06,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-20_08-25-21
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.143596926000383
          entropy_coeff: 0.009999999999999998
          kl: 0.00987171896115421
          policy_loss: -0.14958832123213345
          total_loss: -0.1693976913889249
          vf_explained_var: 0.06044469401240349
          vf_loss: 0.001626598578877747
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,132,1504.25,132000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-20_08-25-31
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1600682020187376
          entropy_coeff: 0.009999999999999998
          kl: 0.011992740191504146
          policy_loss: -0.0203172889434629
          total_loss: -0.025629706722166804
          vf_explained_var: 0.14428938925266266
          vf_loss: 0.01628826366375304
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,133,1514.9,133000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-20_08-25-42
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2506763484742907
          entropy_coeff: 0.009999999999999998
          kl: 0.013779953460893602
          policy_loss: 0.018953884310192532
          total_loss: -0.0006148088309499952
          vf_explained_var: 0.021052565425634384
          vf_loss: 0.0029380670787456137
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,134,1525.29,134000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-20_08-25-52
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.206509155697293
          entropy_coeff: 0.009999999999999998
          kl: 0.011449207505250393
          policy_loss: 0.0554154252840413
          total_loss: 0.044023107985655466
          vf_explained_var: -0.612646758556366
          vf_loss: 0.010672773576031129
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,135,1535.74,135000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-20_08-26-03
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2448644320170086
          entropy_coeff: 0.009999999999999998
          kl: 0.012273749478963009
          policy_loss: -0.1088518454796738
          total_loss: -0.12942870598700312
          vf_explained_var: 0.3179677724838257
          vf_loss: 0.0018717825930151674
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,136,1546.15,136000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-20_08-26-13
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.290715585814582
          entropy_coeff: 0.009999999999999998
          kl: 0.009298926886234524
          policy_loss: -0.0019533632530106437
          total_loss: -0.02397676259279251
          vf_explained_var: 0.11931979656219482
          vf_loss: 0.000883755457147749
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,137,1556.53,137000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-20_08-26-24
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3273967848883736
          entropy_coeff: 0.009999999999999998
          kl: 0.006901245951107882
          policy_loss: -0.11992142908275127
          total_loss: -0.14213847004705005
          vf_explained_var: -0.14774833619594574
          vf_loss: 0.0010569248164150242
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,138,1567.01,138000,-0.05,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-20_08-26-34
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2496753613154095
          entropy_coeff: 0.009999999999999998
          kl: 0.009769380593781563
          policy_loss: -0.08147991672158242
          total_loss: -0.10285244294338756
          vf_explained_var: -0.24912908673286438
          vf_loss: 0.001124224121061464
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,139,1577.53,139000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-20_08-26-45
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.209455225202772
          entropy_coeff: 0.009999999999999998
          kl: 0.013067836652178795
          policy_loss: -0.1071362187465032
          total_loss: -0.1260456054781874
          vf_explained_var: -0.24088501930236816
          vf_loss: 0.0031851650965917444
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,140,1587.95,140000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-20_08-26-55
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.179179220729404
          entropy_coeff: 0.009999999999999998
          kl: 0.010212867164202758
          policy_loss: -0.0896868345224195
          total_loss: -0.10928364404373699
          vf_explained_var: -0.6411134600639343
          vf_loss: 0.0021949810709985386
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,141,1598.51,141000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-20_08-27-06
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1212486346562702
          entropy_coeff: 0.009999999999999998
          kl: 0.010917696674493547
          policy_loss: -0.001044074652923478
          total_loss: -0.02071539101501306
          vf_explained_var: -1.0
          vf_loss: 0.0015411694294824782
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,142,1609.07,142000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-20_08-27-16
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1395360284381444
          entropy_coeff: 0.009999999999999998
          kl: 0.011996070296755857
          policy_loss: -0.06037859258552392
          total_loss: -0.08054419234395027
          vf_explained_var: -0.6089682579040527
          vf_loss: 0.0012297586647845391
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,143,1619.56,143000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-09-20_08-27-27
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.04
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 144
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.144549420144823
          entropy_coeff: 0.009999999999999998
          kl: 0.012865754111114965
          policy_loss: -0.03366266820165846
          total_loss: -0.05403324001365238
          vf_explained_var: -0.7257843017578125
          vf_loss: 0.0010749237003943159
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,144,1630.05,144000,-0.04,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-09-20_08-27-37
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0048095451460943
          entropy_coeff: 0.009999999999999998
          kl: 0.00807733539850659
          policy_loss: 0.10506142543421851
          total_loss: 0.08626478910446167
          vf_explained_var: -0.627723753452301
          vf_loss: 0.0012514573159731097
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,145,1640.53,145000,-0.03,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-09-20_08-27-48
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8840147654215496
          entropy_coeff: 0.009999999999999998
          kl: 0.011834085839901299
          policy_loss: -0.006128891474670834
          total_loss: -0.022628596052527428
          vf_explained_var: -0.9893885254859924
          vf_loss: 0.0023404425566291645
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,146,1650.99,146000,-0.03,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-09-20_08-27-58
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0320396264394125
          entropy_coeff: 0.009999999999999998
          kl: 0.012759966311192155
          policy_loss: 0.038691465204788576
          total_loss: 0.01979020072354211
          vf_explained_var: -0.8683523535728455
          vf_loss: 0.0014191312728346222
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,147,1661.47,147000,-0.03,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-09-20_08-28-09
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.022431939178043
          entropy_coeff: 0.009999999999999998
          kl: 0.009262098908400832
          policy_loss: 0.04817235602272881
          total_loss: 0.03044785120420986
          vf_explained_var: -0.5675338506698608
          vf_loss: 0.002499809752528866
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,148,1671.95,148000,-0.03,0,-2,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-09-20_08-28-19
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8411128520965576
          entropy_coeff: 0.009999999999999998
          kl: 0.012473689916192437
          policy_loss: 0.03725215097268422
          total_loss: 0.020581975496477552
          vf_explained_var: 0.20229876041412354
          vf_loss: 0.001740952266845852
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,149,1682.41,149000,-0.03,0,-2,995.84




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-09-20_08-28-47
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.697166657447815
          entropy_coeff: 0.009999999999999998
          kl: 0.010253536281738937
          policy_loss: 0.045269113034009933
          total_loss: 0.030438541372617086
          vf_explained_var: -0.471422016620636
          vf_loss: 0.0021410984782657275
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,150,1710.62,150000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-09-20_08-28-58
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 151
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7025994724697537
          entropy_coeff: 0.009999999999999998
          kl: 0.006432487398938767
          policy_loss: 0.0036093667149543762
          total_loss: -0.011025957928763496
          vf_explained_var: -0.7287224531173706
          vf_loss: 0.002390671381726861
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,151,1720.72,151000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-09-20_08-29-08
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8783858007854886
          entropy_coeff: 0.009999999999999998
          kl: 0.013936477993359
          policy_loss: 0.10666710974441634
          total_loss: 0.08989093229174613
          vf_explained_var: -0.22379854321479797
          vf_loss: 0.002007682747595633
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,152,1731.21,152000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-09-20_08-29-19
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 153
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.928761891523997
          entropy_coeff: 0.009999999999999998
          kl: 0.008961156287108293
          policy_loss: 0.04052633022268613
          total_loss: 0.023842195007536145
          vf_explained_var: -0.5684307813644409
          vf_loss: 0.0026034868645688726
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,153,1742.41,153000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-09-20_08-29-29
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 154
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9069038391113282
          entropy_coeff: 0.009999999999999998
          kl: 0.008835459813039521
          policy_loss: -0.1281103394097752
          total_loss: -0.14622064580519994
          vf_explained_var: -0.5542846322059631
          vf_loss: 0.0009587336984825217
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,154,1752.41,154000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-09-20_08-29-39
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0331263767348395
          entropy_coeff: 0.009999999999999998
          kl: 0.009951206664902538
          policy_loss: -0.008941985956496663
          total_loss: -0.028210800306664573
          vf_explained_var: -0.2712998390197754
          vf_loss: 0.0010624469267592454
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,155,1762.26,155000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-09-20_08-29-49
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.854465937614441
          entropy_coeff: 0.009999999999999998
          kl: 0.010563008416927536
          policy_loss: -0.05971614900562498
          total_loss: -0.07668500070770581
          vf_explained_var: -0.19832418859004974
          vf_loss: 0.0015758118369073296
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,156,1772.42,156000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-09-20_08-29-59
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.931015510029263
          entropy_coeff: 0.009999999999999998
          kl: 0.010701728258007828
          policy_loss: -0.1580250684171915
          total_loss: -0.1763134772164954
          vf_explained_var: -0.3803372383117676
          vf_loss: 0.0010217454963518926
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,157,1782.43,157000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-09-20_08-30-10
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7057416399319967
          entropy_coeff: 0.009999999999999998
          kl: 0.007635152878485791
          policy_loss: -0.01528987805876467
          total_loss: -0.030321963710917367
          vf_explained_var: -0.5940144658088684
          vf_loss: 0.002025331723658989
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,158,1792.62,158000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-09-20_08-30-19
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9323585642708672
          entropy_coeff: 0.009999999999999998
          kl: 0.00679419075021287
          policy_loss: -0.014811297009388606
          total_loss: -0.032825601514842775
          vf_explained_var: -0.3977850079536438
          vf_loss: 0.0013092828093148354
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,159,1802.33,159000,-0.03,0,-2,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-09-20_08-30-30
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 160
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.709266726175944
          entropy_coeff: 0.009999999999999998
          kl: 0.011849946770960271
          policy_loss: 0.03635709020826552
          total_loss: 0.02265868985818492
          vf_explained_var: -0.5302795171737671
          vf_loss: 0.0033942697783155987
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,160,1812.49,160000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-09-20_08-30-39
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 161
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.031524529722002
          entropy_coeff: 0.009999999999999998
          kl: 0.016136161397814694
          policy_loss: 0.05047281897730298
          total_loss: 0.03388754435711437
          vf_explained_var: -0.21702606976032257
          vf_loss: 0.003729969645953841
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,161,1822.4,161000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-09-20_08-30-49
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225414130422803
          entropy_coeff: 0.009999999999999998
          kl: 0.007339275940083469
          policy_loss: 0.03949136982361476
          total_loss: 0.022887613707118563
          vf_explained_var: -0.9830220341682434
          vf_loss: 0.0016216558119696047
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,162,1832.27,162000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-09-20_08-30-59
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1643920527564156
          entropy_coeff: 0.009999999999999998
          kl: 0.006886587468833532
          policy_loss: 0.044815319610966575
          total_loss: 0.024910272740655475
          vf_explained_var: -0.3528602719306946
          vf_loss: 0.0017388750514429477
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,163,1842.33,163000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-09-20_08-31-10
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0015248974164326
          entropy_coeff: 0.009999999999999998
          kl: 0.00900628923263447
          policy_loss: 0.06433816916412777
          total_loss: 0.04586701513164573
          vf_explained_var: -0.05829077959060669
          vf_loss: 0.0015440976570567323
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,164,1852.45,164000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-09-20_08-31-20
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0489945398436653
          entropy_coeff: 0.009999999999999998
          kl: 0.009430453485419873
          policy_loss: 0.028449697047472
          total_loss: 0.010367792430851194
          vf_explained_var: 0.06774420291185379
          vf_loss: 0.002408039236130814
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,165,1862.72,165000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-09-20_08-31-30
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 166
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0162402987480164
          entropy_coeff: 0.009999999999999998
          kl: 0.010650142933445681
          policy_loss: -0.06921524935298495
          total_loss: -0.08724224724703365
          vf_explained_var: -0.32626304030418396
          vf_loss: 0.002135405090585765
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,166,1872.74,166000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-09-20_08-31-40
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 167
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0637549665239123
          entropy_coeff: 0.009999999999999998
          kl: 0.005865713156252106
          policy_loss: -0.09443898503151205
          total_loss: -0.11279116057687336
          vf_explained_var: -0.7848948240280151
          vf_loss: 0.0022853735187608336
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,167,1882.91,167000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-09-20_08-31-50
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.03
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 168
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.009261128637526
          entropy_coeff: 0.009999999999999998
          kl: 0.00854285635255615
          policy_loss: -0.038546573867400485
          total_loss: -0.057449499600463444
          vf_explained_var: 0.27705129981040955
          vf_loss: 0.0011896852804865275
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,168,1893.17,168000,-0.03,0,-2,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-09-20_08-32-01
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2485204935073853
          entropy_coeff: 0.009999999999999998
          kl: 0.009245189221056115
          policy_loss: 0.06970644891262054
          total_loss: 0.04893759406275219
          vf_explained_var: -0.4373665153980255
          vf_loss: 0.0017163488819884757
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,169,1903.41,169000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-09-20_08-32-11
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 170
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0617015110121835
          entropy_coeff: 0.009999999999999998
          kl: 0.005143946470730719
          policy_loss: -0.07652996364567015
          total_loss: -0.09479184974398878
          vf_explained_var: -0.35004207491874695
          vf_loss: 0.002355129480646509
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,170,1913.49,170000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-09-20_08-32-21
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 171
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9662905004289415
          entropy_coeff: 0.009999999999999998
          kl: 0.010018537482064991
          policy_loss: -0.06077228126426538
          total_loss: -0.07819289486441347
          vf_explained_var: -0.7911914587020874
          vf_loss: 0.0022422926707400216
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,171,1923.48,171000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-09-20_08-32-31
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 172
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9080730398495993
          entropy_coeff: 0.009999999999999998
          kl: 0.006925906335424041
          policy_loss: -0.039150238119893606
          total_loss: -0.05638300933771663
          vf_explained_var: -1.0
          vf_loss: 0.0018479586080906705
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,172,1933.58,172000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-09-20_08-32-41
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8249986688296
          entropy_coeff: 0.009999999999999998
          kl: 0.007627164796434233
          policy_loss: -0.04270695013304551
          total_loss: -0.05973047816918956
          vf_explained_var: -0.6725946068763733
          vf_loss: 0.0012264596945088771
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,173,1943.76,173000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-09-20_08-32-51
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8058018591668872
          entropy_coeff: 0.009999999999999998
          kl: 0.005699818053959997
          policy_loss: -0.05038702653514014
          total_loss: -0.06702649494012197
          vf_explained_var: -0.9999088048934937
          vf_loss: 0.001418552723609739
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,174,1953.88,174000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-09-20_08-33-01
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 175
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9335892743534513
          entropy_coeff: 0.009999999999999998
          kl: 0.008668825360535573
          policy_loss: -0.03736035231914785
          total_loss: -0.054838732671406536
          vf_explained_var: -0.8436933159828186
          vf_loss: 0.0018575120264560812
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,175,1963.91,175000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-09-20_08-33-11
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 176
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7783190184169346
          entropy_coeff: 0.009999999999999998
          kl: 0.010047696986673849
          policy_loss: -0.007073184475302696
          total_loss: -0.02311445830596818
          vf_explained_var: -1.0
          vf_loss: 0.0017419155687093736
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,176,1973.93,176000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-09-20_08-33-21
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 177
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7329726762241788
          entropy_coeff: 0.009999999999999998
          kl: 0.007181941080705675
          policy_loss: 0.024278182577755717
          total_loss: 0.008006243407726288
          vf_explained_var: -0.7575583457946777
          vf_loss: 0.0010577907604682776
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,177,1984.02,177000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-09-20_08-33-32
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 178
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5324526720576817
          entropy_coeff: 0.009999999999999998
          kl: 0.0056427485046082635
          policy_loss: -0.06298537010120021
          total_loss: -0.07747171603971058
          vf_explained_var: -0.5808159112930298
          vf_loss: 0.0008381795157523205
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,178,1994.22,178000,-0.01,0,-1,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-09-20_08-33-42
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.885928213596344e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7091769364145066
          entropy_coeff: 0.009999999999999998
          kl: 0.004875399914943457
          policy_loss: -0.022072459591759575
          total_loss: -0.03760491121146414
          vf_explained_var: -0.4207025170326233
          vf_loss: 0.0015593189552115898
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,179,2004.62,179000,-0.01,0,-1,995.81




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-09-20_08-34-09
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.66976248688168
          entropy_coeff: 0.009999999999999998
          kl: 0.00750395816692316
          policy_loss: -0.058999758544895385
          total_loss: -0.07428325861692428
          vf_explained_var: -0.3374709486961365
          vf_loss: 0.001414123553937922
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,180,2031.26,180000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-09-20_08-34-20
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 181
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6754596577750311
          entropy_coeff: 0.009999999999999998
          kl: 0.0076887043196217225
          policy_loss: -0.012537481304672029
          total_loss: -0.027832083900769553
          vf_explained_var: -0.5396286249160767
          vf_loss: 0.00145999182584799
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,181,2042.53,181000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-09-20_08-34-30
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 182
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7885844786961873
          entropy_coeff: 0.009999999999999998
          kl: 0.006613154015048275
          policy_loss: 0.07794097264607748
          total_loss: 0.06150087784561846
          vf_explained_var: -0.39964836835861206
          vf_loss: 0.001445751596798396
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,182,2052.34,182000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-09-20_08-34-40
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 183
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.725207633442349
          entropy_coeff: 0.009999999999999998
          kl: 0.007045241866112168
          policy_loss: -0.0452356970972485
          total_loss: -0.06109626938899358
          vf_explained_var: -0.3046603202819824
          vf_loss: 0.0013915043945113817
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,183,2061.96,183000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-09-20_08-34-49
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 184
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6762632091840108
          entropy_coeff: 0.009999999999999998
          kl: 0.006744297719715359
          policy_loss: 0.02709362900091542
          total_loss: 0.010954050429993206
          vf_explained_var: -0.6021944880485535
          vf_loss: 0.0006230485923070875
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,184,2071.56,184000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-09-20_08-34-59
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 185
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.8285709685749478
          entropy_coeff: 0.009999999999999998
          kl: 0.009952311876827243
          policy_loss: -0.008421013462874624
          total_loss: -0.026055426109168264
          vf_explained_var: -0.7576141953468323
          vf_loss: 0.0006512951765519877
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,185,2080.97,185000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-09-20_08-35-08
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 186
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6682938920127022
          entropy_coeff: 0.009999999999999998
          kl: 0.00981139065730261
          policy_loss: -0.08806632061799367
          total_loss: -0.10409031994640827
          vf_explained_var: -0.29615840315818787
          vf_loss: 0.0006589406819936509
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,186,2090.17,186000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-09-20_08-35-17
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 187
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6842811465263368
          entropy_coeff: 0.009999999999999998
          kl: 0.011735283864466487
          policy_loss: -0.034306193060345117
          total_loss: -0.0503106243080563
          vf_explained_var: -0.0404602512717247
          vf_loss: 0.0008383807043881259
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,187,2099.31,187000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-09-20_08-35-26
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 188
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.8335965924792819
          entropy_coeff: 0.009999999999999998
          kl: 0.010174872020174518
          policy_loss: -0.04677462660604053
          total_loss: -0.06451612164576849
          vf_explained_var: -0.33553895354270935
          vf_loss: 0.0005944727420380028
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,188,2108.18,188000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-09-20_08-35-35
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9490810341305203
          entropy_coeff: 0.009999999999999998
          kl: 0.007056207957380699
          policy_loss: 0.011537795265515646
          total_loss: -0.007423992620574103
          vf_explained_var: 0.3393772840499878
          vf_loss: 0.0005290242477510927
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,189,2116.95,189000,-0.01,0,-1,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-09-20_08-35-44
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6935953352186415
          entropy_coeff: 0.009999999999999998
          kl: 0.006623536253318305
          policy_loss: -0.03833904792037275
          total_loss: -0.05402970069812404
          vf_explained_var: -0.9962718486785889
          vf_loss: 0.001245298679148416
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,190,2126.09,190000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-09-20_08-35-53
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 191
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.8303142348925272
          entropy_coeff: 0.009999999999999998
          kl: 0.016685979339269134
          policy_loss: -0.03619607144759761
          total_loss: -0.052929166621632044
          vf_explained_var: -0.34126976132392883
          vf_loss: 0.0015700453520467918
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,191,2135.16,191000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-09-20_08-36-02
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 192
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.42964106798172e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.502307724290424
          entropy_coeff: 0.009999999999999998
          kl: 0.0048305306444069475
          policy_loss: -0.021690463026364643
          total_loss: -0.03598155313067966
          vf_explained_var: -0.5106896758079529
          vf_loss: 0.000731987455381184
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,192,2144.43,192000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-09-20_08-36-11
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 193
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7452350152863396
          entropy_coeff: 0.009999999999999998
          kl: 0.006355911212879937
          policy_loss: -0.018942816721068487
          total_loss: -0.03483642182416386
          vf_explained_var: -0.4916817843914032
          vf_loss: 0.001558745646616444
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,193,2153.42,193000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-09-20_08-36-20
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 194
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7418097416559855
          entropy_coeff: 0.009999999999999998
          kl: 0.009173458863244516
          policy_loss: -0.0694458967488673
          total_loss: -0.08629562871323691
          vf_explained_var: -0.3612527549266815
          vf_loss: 0.000568366937457338
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,194,2162.54,194000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-09-20_08-36-30
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 195
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4631439679198794
          entropy_coeff: 0.009999999999999998
          kl: 0.006025515311936916
          policy_loss: 0.06606821376416418
          total_loss: 0.052142609324720174
          vf_explained_var: -0.047484006732702255
          vf_loss: 0.0007058390328893438
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,195,2171.69,195000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-09-20_08-36-39
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 196
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.750146762530009
          entropy_coeff: 0.009999999999999998
          kl: 0.008246017619722067
          policy_loss: -0.027149413567450313
          total_loss: -0.043045986029836864
          vf_explained_var: 0.10662201046943665
          vf_loss: 0.0016048958812866154
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,196,2180.82,196000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-09-20_08-36-48
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 197
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.032422669728597
          entropy_coeff: 0.009999999999999998
          kl: 0.009014231226482237
          policy_loss: -0.004732698533270094
          total_loss: -0.023569897934794425
          vf_explained_var: -0.9626822471618652
          vf_loss: 0.0014870273349515627
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,197,2189.71,197000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-09-20_08-36-57
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 198
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9331730842590331
          entropy_coeff: 0.009999999999999998
          kl: 0.01895885131639723
          policy_loss: -0.07065070025208924
          total_loss: -0.08799697315941254
          vf_explained_var: -0.5155646204948425
          vf_loss: 0.001985458651809798
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,198,2198.88,198000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-09-20_08-37-06
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 199
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.094277434878879
          entropy_coeff: 0.009999999999999998
          kl: 0.011805029237363998
          policy_loss: -0.032470125953356424
          total_loss: -0.0522883541468117
          vf_explained_var: -0.8474217653274536
          vf_loss: 0.0011245468755886475
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,199,2208.03,199000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-09-20_08-37-16
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 200
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.672680393854777
          entropy_coeff: 0.009999999999999998
          kl: 0.006272767425976748
          policy_loss: -0.07125072880751557
          total_loss: -0.08595100550187959
          vf_explained_var: -0.273054301738739
          vf_loss: 0.0020265270131252086
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,200,2218.04,200000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-09-20_08-37-26
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 201
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4719526727994283
          entropy_coeff: 0.009999999999999998
          kl: 0.011739085157912202
          policy_loss: -0.06932069431576464
          total_loss: -0.08204991999599669
          vf_explained_var: -0.6370185613632202
          vf_loss: 0.001990299829049036
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,201,2228.2,201000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-09-20_08-37-37
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 202
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.71482053399086e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3153857469558716
          entropy_coeff: 0.009999999999999998
          kl: 0.004299972275234904
          policy_loss: 0.008980590291321278
          total_loss: -0.0035310647967788907
          vf_explained_var: -0.9119674563407898
          vf_loss: 0.0006422012963513326
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,202,2238.54,202000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-09-20_08-37-46
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 203
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7039101084073385
          entropy_coeff: 0.009999999999999998
          kl: 0.007959033012303211
          policy_loss: -0.016591800914870367
          total_loss: -0.030524822655651304
          vf_explained_var: -0.12489587813615799
          vf_loss: 0.003106077051618033
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,203,2248.24,203000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-09-20_08-37-56
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 204
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0990859296586777
          entropy_coeff: 0.009999999999999998
          kl: 0.009437312824042938
          policy_loss: -0.01578670262048642
          total_loss: -0.03540248307916853
          vf_explained_var: -0.2151176929473877
          vf_loss: 0.00137507768983293
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,204,2257.49,204000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-09-20_08-38-05
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 205
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9966341349813672
          entropy_coeff: 0.009999999999999998
          kl: 0.00860298653897419
          policy_loss: 0.012926070474916035
          total_loss: -0.005290281689829297
          vf_explained_var: -0.8727582097053528
          vf_loss: 0.0017499904442552683
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,205,2266.76,205000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-09-20_08-38-14
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 206
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0438517491022745
          entropy_coeff: 0.009999999999999998
          kl: 0.013057337814518307
          policy_loss: -0.041487752232286665
          total_loss: -0.06105372909870413
          vf_explained_var: -1.0
          vf_loss: 0.0008725404577691936
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,206,2275.95,206000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-09-20_08-38-24
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 207
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7757176107830472
          entropy_coeff: 0.009999999999999998
          kl: 0.007606227185448431
          policy_loss: -0.10941694660319222
          total_loss: -0.12440366836057769
          vf_explained_var: -0.8725439310073853
          vf_loss: 0.0027704540433155164
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,207,2285.52,207000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-09-20_08-38-34
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 208
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.739012316862742
          entropy_coeff: 0.009999999999999998
          kl: 0.006077049121660928
          policy_loss: -0.006134260528617435
          total_loss: -0.01852975876794921
          vf_explained_var: -0.5349135994911194
          vf_loss: 0.004994624974723492
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,208,2295.52,208000,-0.01,0,-1,995.74


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-09-20_08-38-44
  done: false
  episode_len_mean: 995.74
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 209
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7524069918526544
          entropy_coeff: 0.009999999999999998
          kl: 0.008529864938479431
          policy_loss: 0.005798413811458482
          total_loss: -0.00826257032652696
          vf_explained_var: -0.6780812740325928
          vf_loss: 0.003463084936245448
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,209,2305.57,209000,-0.01,0,-1,995.74




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-09-20_08-39-12
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 210
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7199339350064595
          entropy_coeff: 0.009999999999999998
          kl: 0.009458057210793076
          policy_loss: -0.07043784126225446
          total_loss: -0.0833537155141433
          vf_explained_var: -1.0
          vf_loss: 0.004283464141190052
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,210,2333.86,210000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-09-20_08-39-22
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 211
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.86865410539839
          entropy_coeff: 0.009999999999999998
          kl: 0.0095192220125447
          policy_loss: -0.028390164093838797
          total_loss: -0.04595774999923176
          vf_explained_var: -0.7889145016670227
          vf_loss: 0.0011189541333199789
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,211,2343.87,211000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-09-20_08-39-32
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 212
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6827954702907137
          entropy_coeff: 0.009999999999999998
          kl: 0.008147266709732016
          policy_loss: 0.11255771422551739
          total_loss: 0.09724707872503334
          vf_explained_var: -0.28156396746635437
          vf_loss: 0.001517318419387771
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,212,2354.21,212000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-09-20_08-39-43
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 213
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6208734936184352
          entropy_coeff: 0.009999999999999998
          kl: 0.00829815805707123
          policy_loss: -0.009500197548833158
          total_loss: -0.023068164185517364
          vf_explained_var: -0.5166606903076172
          vf_loss: 0.002640765393152833
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,213,2364.62,213000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-09-20_08-39-53
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 214
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4898710674709743
          entropy_coeff: 0.009999999999999998
          kl: 0.006202156311016359
          policy_loss: -0.03298087923063172
          total_loss: -0.045982972201373845
          vf_explained_var: -0.8081682920455933
          vf_loss: 0.0018966192605956976
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,214,2375.01,214000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-09-20_08-40-04
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 215
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4176749295658535
          entropy_coeff: 0.009999999999999998
          kl: 0.008150875840293163
          policy_loss: 0.047531225035587946
          total_loss: 0.03428490294350518
          vf_explained_var: -0.5455264449119568
          vf_loss: 0.0009304258778380851
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,215,2385.52,215000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-09-20_08-40-14
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 216
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.8524711079067655
          entropy_coeff: 0.009999999999999998
          kl: 0.01284171719509912
          policy_loss: 0.007113679581218295
          total_loss: -0.008810105754269494
          vf_explained_var: -0.6507301330566406
          vf_loss: 0.002600926709257894
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,216,2395.83,216000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-09-20_08-40-25
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 217
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6527560008896722
          entropy_coeff: 0.009999999999999998
          kl: 0.008064838340319763
          policy_loss: 0.011986394433511629
          total_loss: -0.0027845049897829693
          vf_explained_var: -0.6979973316192627
          vf_loss: 0.0017566605923800833
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,217,2406.15,217000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-09-20_08-40-35
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 218
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.551818323135376
          entropy_coeff: 0.009999999999999998
          kl: 0.011265581409404115
          policy_loss: 0.036117742334802944
          total_loss: 0.021969865759213766
          vf_explained_var: -0.7589706182479858
          vf_loss: 0.0013703084492590278
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,218,2416.49,218000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-09-20_08-40-45
  done: false
  episode_len_mean: 994.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 219
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5283514936765035
          entropy_coeff: 0.009999999999999998
          kl: 0.007213799559780851
          policy_loss: 0.028621351718902587
          total_loss: 0.014414660301473406
          vf_explained_var: -0.621243953704834
          vf_loss: 0.0010768229458739775
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,219,2426.86,219000,-0.01,0,-1,994.37


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-09-20_08-40-56
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 220
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.35741026699543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7901013082928128
          entropy_coeff: 0.009999999999999998
          kl: 0.043336969621260595
          policy_loss: -0.08089988314443164
          total_loss: -0.09497737818294101
          vf_explained_var: 0.05714292451739311
          vf_loss: 0.0038235192575181523
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,220,2437.31,220000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-09-20_08-41-05
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 221
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.276676160759396
          entropy_coeff: 0.009999999999999998
          kl: 0.01016851611061769
          policy_loss: 0.04005490725653039
          total_loss: 0.018179547062350643
          vf_explained_var: -0.639802873134613
          vf_loss: 0.0008913997039295888
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,221,2446.57,221000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-09-20_08-41-15
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 222
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0157647172609967
          entropy_coeff: 0.009999999999999998
          kl: 0.010288753515158927
          policy_loss: -0.00022315233945846558
          total_loss: -0.01811146347059144
          vf_explained_var: -0.8558865189552307
          vf_loss: 0.0022693363690955773
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,222,2456.4,222000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-09-20_08-41-24
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 223
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2530080344941883
          entropy_coeff: 0.009999999999999998
          kl: 0.0108097874689506
          policy_loss: 0.03349294281668133
          total_loss: 0.01131325935324033
          vf_explained_var: -0.24042575061321259
          vf_loss: 0.00035039603119205116
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,223,2465.37,223000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-09-20_08-41-34
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 224
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.043202969763014
          entropy_coeff: 0.009999999999999998
          kl: 0.012194136847496
          policy_loss: -0.021356350431839626
          total_loss: -0.04028815080722173
          vf_explained_var: -0.9671543836593628
          vf_loss: 0.0015002321251409334
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,224,2475.16,224000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-09-20_08-41-43
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 225
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2773196591271296
          entropy_coeff: 0.009999999999999998
          kl: 0.00992689444259543
          policy_loss: -0.03281853770216306
          total_loss: -0.0550518607099851
          vf_explained_var: -0.36772027611732483
          vf_loss: 0.0005398740374958935
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,225,2484.01,225000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-09-20_08-41-52
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 226
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9376076009538439
          entropy_coeff: 0.009999999999999998
          kl: 0.010310395248565543
          policy_loss: -0.007476647881170114
          total_loss: -0.025856616492900584
          vf_explained_var: -0.5551285743713379
          vf_loss: 0.0009961051982827485
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,226,2493.52,226000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-09-20_08-42-01
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 227
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1817901876237658
          entropy_coeff: 0.009999999999999998
          kl: 0.010636047074657482
          policy_loss: -0.01523798317131069
          total_loss: -0.03594363973372512
          vf_explained_var: -0.4592134654521942
          vf_loss: 0.0011122453097211998
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,227,2502.82,227000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-09-20_08-42-11
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 228
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9410293089018928
          entropy_coeff: 0.009999999999999998
          kl: 0.013691920207669522
          policy_loss: -0.04506187149220043
          total_loss: -0.0634595814678404
          vf_explained_var: -0.37869957089424133
          vf_loss: 0.0010125837440783571
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,228,2512.33,228000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-09-20_08-42-20
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 229
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.272096844514211
          entropy_coeff: 0.009999999999999998
          kl: 0.012375246534125889
          policy_loss: -0.025429291878309515
          total_loss: -0.04783301163050863
          vf_explained_var: -0.6569929122924805
          vf_loss: 0.0003172502536067946
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,229,2521.35,229000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-09-20_08-42-29
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 230
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1718305985132855
          entropy_coeff: 0.009999999999999998
          kl: 0.008042288894903047
          policy_loss: -0.01310912279619111
          total_loss: -0.033598194354110295
          vf_explained_var: -0.8047248721122742
          vf_loss: 0.0012292357980590572
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,230,2530.67,230000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-09-20_08-42-39
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 231
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.248565453953213
          entropy_coeff: 0.009999999999999998
          kl: 0.01145060140454343
          policy_loss: -0.04052614242666298
          total_loss: -0.061933975997898315
          vf_explained_var: -0.0205323938280344
          vf_loss: 0.001077820095674219
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,231,2539.94,231000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-09-20_08-42-49
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 232
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0988155047098798
          entropy_coeff: 0.009999999999999998
          kl: 0.012978029247075046
          policy_loss: -0.0028877574536535474
          total_loss: -0.022363094343907302
          vf_explained_var: -0.30409228801727295
          vf_loss: 0.0015128192155518467
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,232,2550.4,232000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-09-20_08-43-00
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 233
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0568087153964574
          entropy_coeff: 0.009999999999999998
          kl: 0.018956224026935774
          policy_loss: -0.013946632461415396
          total_loss: -0.03235093884997898
          vf_explained_var: -1.0
          vf_loss: 0.0021637799554607936
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,233,2560.94,233000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-09-20_08-43-10
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 234
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.107780109511481
          entropy_coeff: 0.009999999999999998
          kl: 0.011992258345314453
          policy_loss: -0.008814310199684566
          total_loss: -0.028243041535218557
          vf_explained_var: -0.9847910404205322
          vf_loss: 0.0016490675625391304
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,234,2571.04,234000,-0.01,0,-1,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-09-20_08-43-20
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 235
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9817204488648308
          entropy_coeff: 0.009999999999999998
          kl: 0.013437970463033563
          policy_loss: -0.15156706124544145
          total_loss: -0.16940622842974132
          vf_explained_var: -0.6857007145881653
          vf_loss: 0.0019780359864752326
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,235,2581.51,235000,0,0,0,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-09-20_08-43-31
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 236
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6909074223703808
          entropy_coeff: 0.009999999999999998
          kl: 0.005450054605507611
          policy_loss: -0.11543304233087434
          total_loss: -0.12039515901770857
          vf_explained_var: 0.07684057205915451
          vf_loss: 0.001946956453482724
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,236,2592.23,236000,0,0,0,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-09-20_08-43-41
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 237
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9331088052855598
          entropy_coeff: 0.009999999999999998
          kl: 0.01408040941560093
          policy_loss: -0.08577337997655074
          total_loss: -0.10251738876104355
          vf_explained_var: -0.25969669222831726
          vf_loss: 0.0025870780269744704
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,237,2601.9,237000,0,0,0,995.82


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-09-20_08-43-51
  done: false
  episode_len_mean: 995.82
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 238
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9474683178795709
          entropy_coeff: 0.009999999999999998
          kl: 0.010175479326364763
          policy_loss: -0.004886252101924685
          total_loss: -0.02222894210782316
          vf_explained_var: -1.0
          vf_loss: 0.0021319950891969105
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,238,2611.91,238000,0,0,0,995.82




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-09-20_08-44-18
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 240
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5361154004931466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2247473471694523
          entropy_coeff: 0.009999999999999998
          kl: 0.02604151575871367
          policy_loss: -0.10044373737441169
          total_loss: -0.10419283757607142
          vf_explained_var: 0.22659869492053986
          vf_loss: 0.008498371808996631
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,239,2639.53,239000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-09-20_08-44-29
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 241
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2603327406777276
          entropy_coeff: 0.009999999999999998
          kl: 0.011750379562353983
          policy_loss: -0.04903743742033839
          total_loss: -0.07083116628022658
          vf_explained_var: -0.5758861899375916
          vf_loss: 0.00080959889920046
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,240,2650.34,240000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-09-20_08-44-40
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 242
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9019451459248861
          entropy_coeff: 0.009999999999999998
          kl: 0.007055099535345417
          policy_loss: 0.0689423634774155
          total_loss: 0.06352142608828015
          vf_explained_var: 0.11960338801145554
          vf_loss: 0.003598514256171054
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,241,2660.55,241000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-09-20_08-44-50
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.02
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 243
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0265649603472815
          entropy_coeff: 0.009999999999999998
          kl: 0.01404587911385325
          policy_loss: -0.022399708752830823
          total_loss: 0.0035401361684004465
          vf_explained_var: 0.029405275359749794
          vf_loss: 0.036205493989917965
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,242,2670.85,242000,0.02,2,0,994.48




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-09-20_08-45-23
  done: false
  episode_len_mean: 993.53
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 244
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3769606590270995
          entropy_coeff: 0.009999999999999998
          kl: 0.02853776524557509
          policy_loss: 0.14594898207320106
          total_loss: 0.20952470401922862
          vf_explained_var: -0.3451897203922272
          vf_loss: 0.07734532931095196
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,243,2704.4,243000,0.06,4,0,993.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-09-20_08-45-35
  done: false
  episode_len_mean: 993.53
  episode_media: {}
  episode_reward_max: 4.0
  episode_reward_mean: 0.07
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 245
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.956259651109576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.4335781123903062
          entropy_coeff: 0.009999999999999998
          kl: 0.01767907067493061
          policy_loss: 0.015790208594666587
          total_loss: 0.026520576866136656
          vf_explained_var: -0.29252174496650696
          vf_loss: 0.025066150036743947
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,244,2716.45,244000,0.07,4,0,993.53




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-09-20_08-46-21
  done: false
  episode_len_mean: 988.36
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.13
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 246
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.956259651109576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6375966919793024
          entropy_coeff: 0.009999999999999998
          kl: 0.013193082553850388
          policy_loss: -0.13412715399430858
          total_loss: 0.06866510692569945
          vf_explained_var: 0.2012467086315155
          vf_loss: 0.21916822493076324
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,245,2761.57,245000,0.13,6,0,988.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-09-20_08-46-32
  done: false
  episode_len_mean: 988.36
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.13
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 247
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.956259651109576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9773715747727287
          entropy_coeff: 0.009999999999999998
          kl: 0.02108768091977274
          policy_loss: 0.12334434621863895
          total_loss: 0.10906514740652508
          vf_explained_var: -0.2981175482273102
          vf_loss: 0.005494525282928306
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,246,2773.36,246000,0.13,6,0,988.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-09-20_08-46-43
  done: false
  episode_len_mean: 988.36
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.13
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 248
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1934389476664367e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.663491419951121
          entropy_coeff: 0.009999999999999998
          kl: 0.012805698994560435
          policy_loss: -0.10184545053376091
          total_loss: -0.05627917477654086
          vf_explained_var: 0.2690364718437195
          vf_loss: 0.06220119203854766
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,247,2783.76,247000,0.13,6,0,988.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-09-20_08-46-52
  done: false
  episode_len_mean: 988.36
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 249
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1934389476664367e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5640294932656817
          entropy_coeff: 0.009999999999999998
          kl: 0.029312027479855857
          policy_loss: -0.03411877482301659
          total_loss: -0.04069791759053866
          vf_explained_var: -0.7553772926330566
          vf_loss: 0.009061152741196565
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,248,2793.07,248000,0.14,6,0,988.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-09-20_08-47-02
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 250
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2733052915996974
          entropy_coeff: 0.009999999999999998
          kl: 0.013649687988103251
          policy_loss: 0.02321485471394327
          total_loss: 0.0037688830660449132
          vf_explained_var: -0.6538790464401245
          vf_loss: 0.003287084162972557
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,249,2802.64,249000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-09-20_08-47-10
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 251
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4012120167414346
          entropy_coeff: 0.009999999999999998
          kl: 0.013340791401880934
          policy_loss: 0.018806481226864787
          total_loss: -0.0038687469230757818
          vf_explained_var: -0.2620822787284851
          vf_loss: 0.0013368904313311861
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,250,2811.22,250000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-09-20_08-47-19
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 252
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.371502078904046
          entropy_coeff: 0.009999999999999998
          kl: 0.011470526822001325
          policy_loss: 0.02950486164126131
          total_loss: 0.006639244821336534
          vf_explained_var: -0.845244824886322
          vf_loss: 0.0008494051651117237
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,251,2819.86,251000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-09-20_08-47-28
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 253
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4022570927937825
          entropy_coeff: 0.009999999999999998
          kl: 0.011198402979900221
          policy_loss: 0.036891376972198485
          total_loss: 0.01352921658092075
          vf_explained_var: -0.6992425322532654
          vf_loss: 0.0006604085731345954
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,252,2828.54,252000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-09-20_08-47-36
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 254
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4226600938373144
          entropy_coeff: 0.009999999999999998
          kl: 0.010361993852837594
          policy_loss: 0.012375348971949683
          total_loss: -0.011336577435334524
          vf_explained_var: -0.22357600927352905
          vf_loss: 0.0005146734536336024
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,253,2837.19,253000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-09-20_08-47-45
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 255
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5016263537936743
          entropy_coeff: 0.009999999999999998
          kl: 0.009493969702185644
          policy_loss: -0.009025418841176563
          total_loss: -0.03355965912342072
          vf_explained_var: -0.9498552083969116
          vf_loss: 0.0004820267473809913
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,254,2845.83,254000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-09-20_08-47-54
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 256
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.476514373885261
          entropy_coeff: 0.009999999999999998
          kl: 0.013117079169570268
          policy_loss: 0.006207507890131738
          total_loss: -0.01785386653823985
          vf_explained_var: -0.42324298620224
          vf_loss: 0.0007037716386548709
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,255,2854.5,255000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-09-20_08-48-02
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 257
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.545686059527927
          entropy_coeff: 0.009999999999999998
          kl: 0.00916759736339464
          policy_loss: -0.006138137893544303
          total_loss: -0.031242879976828893
          vf_explained_var: -0.9865363836288452
          vf_loss: 0.0003521169318547537
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,256,2863.1,256000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-09-20_08-48-11
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 258
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.57900570763482
          entropy_coeff: 0.009999999999999998
          kl: 0.010323200185181215
          policy_loss: 0.004179228842258453
          total_loss: -0.02099077320761151
          vf_explained_var: -0.8319833278656006
          vf_loss: 0.000620053486474919
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,257,2871.83,257000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-09-20_08-48-20
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 259
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5351978222529095
          entropy_coeff: 0.009999999999999998
          kl: 0.008653074561832398
          policy_loss: -0.005164514978726705
          total_loss: -0.02983258060283131
          vf_explained_var: -0.5605766773223877
          vf_loss: 0.0006839136837192604
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,258,2880.77,258000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-09-20_08-48-29
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 260
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5656768321990966
          entropy_coeff: 0.009999999999999998
          kl: 0.0073712006362508
          policy_loss: -0.013522982845703762
          total_loss: -0.03862222383419673
          vf_explained_var: -0.715560257434845
          vf_loss: 0.0005575298351686797
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,259,2889.66,259000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-09-20_08-48-38
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 261
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.790158421499654e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5950882964664035
          entropy_coeff: 0.009999999999999998
          kl: 0.003458307062311958
          policy_loss: -0.022004890359110303
          total_loss: -0.047575382391611735
          vf_explained_var: -1.0
          vf_loss: 0.0003803900155617157
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,260,2898.48,260000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-09-20_08-48-47
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 262
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.569267990854051
          entropy_coeff: 0.009999999999999998
          kl: 0.008409666927043504
          policy_loss: 0.03658289853483439
          total_loss: 0.011425421738790142
          vf_explained_var: -0.9977015852928162
          vf_loss: 0.0005352074314537782
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,261,2907.36,261000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-09-20_08-48-56
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 263
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.5128849267959597
          entropy_coeff: 0.009999999999999998
          kl: 0.010090617284055152
          policy_loss: -0.011992115692959892
          total_loss: -0.036706556090050274
          vf_explained_var: -0.8910685181617737
          vf_loss: 0.0004144091646594461
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,262,2916.31,262000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-09-20_08-49-05
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 264
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.505356211132473
          entropy_coeff: 0.009999999999999998
          kl: 0.01126405727096148
          policy_loss: 0.003148343786597252
          total_loss: -0.021651703988512357
          vf_explained_var: 0.003527984954416752
          vf_loss: 0.0002535111821291341
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,263,2925.12,263000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-09-20_08-49-13
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 265
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.5383474614885118
          entropy_coeff: 0.009999999999999998
          kl: 0.011363145877560162
          policy_loss: -0.015119804804109864
          total_loss: -0.03982894791083203
          vf_explained_var: -0.9035295248031616
          vf_loss: 0.0006743323497681154
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,264,2933.85,264000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-09-20_08-49-22
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4547805786132812
          entropy_coeff: 0.009999999999999998
          kl: 0.007696310543571173
          policy_loss: -0.052429661403099695
          total_loss: -0.07651546866529518
          vf_explained_var: -0.9054166078567505
          vf_loss: 0.00046199906579810583
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,265,2942.64,265000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-09-20_08-49-31
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 267
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.5033901664945812
          entropy_coeff: 0.009999999999999998
          kl: 0.01144971664084693
          policy_loss: -0.038045931938621734
          total_loss: -0.06255570161673758
          vf_explained_var: -0.9719310402870178
          vf_loss: 0.0005241341449921795
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,266,2951.29,266000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-09-20_08-49-40
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 268
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.288235815366109
          entropy_coeff: 0.009999999999999998
          kl: 0.006955607540415482
          policy_loss: -0.03473443382730087
          total_loss: -0.056039095752769046
          vf_explained_var: -0.872223973274231
          vf_loss: 0.0015776952562898967
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,267,2960.37,267000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-09-20_08-49-49
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 269
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.489614258872138
          entropy_coeff: 0.009999999999999998
          kl: 0.01026463640270955
          policy_loss: -0.039276812598109244
          total_loss: -0.06365241048236688
          vf_explained_var: -0.9996179342269897
          vf_loss: 0.0005205451499528459
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,268,2969.16,268000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-09-20_08-49-58
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 270
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4398334397210015
          entropy_coeff: 0.009999999999999998
          kl: 0.009267284950534426
          policy_loss: -0.04380449144066208
          total_loss: -0.06756750889536407
          vf_explained_var: -0.9102381467819214
          vf_loss: 0.000635316569565071
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,269,2978.25,269000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-09-20_08-50-07
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 271
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4067115677727595
          entropy_coeff: 0.009999999999999998
          kl: 0.011135861013576331
          policy_loss: -0.04857858991664317
          total_loss: -0.0712165433085627
          vf_explained_var: -1.0
          vf_loss: 0.0014291573088687276
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,270,2987.61,270000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-09-20_08-50-17
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 272
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4015033774905734
          entropy_coeff: 0.009999999999999998
          kl: 0.012568986606382953
          policy_loss: -0.024713923575149642
          total_loss: -0.047548557010789715
          vf_explained_var: -0.31914734840393066
          vf_loss: 0.0011803984162624046
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,271,2997.13,271000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-09-20_08-50-26
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 273
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4724652237362332
          entropy_coeff: 0.009999999999999998
          kl: 0.011467343303082187
          policy_loss: -0.021688125220437844
          total_loss: -0.04585779913597637
          vf_explained_var: -1.0
          vf_loss: 0.0005549781826428241
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,272,3006.38,272000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-09-20_08-50-35
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 274
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4438891145918102
          entropy_coeff: 0.009999999999999998
          kl: 0.0083797400299289
          policy_loss: -0.00839735513759984
          total_loss: -0.03237607147958543
          vf_explained_var: -0.8108252882957458
          vf_loss: 0.00046017364652976134
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,273,3015.47,273000,0.14,6,0,989.72


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-09-20_08-50-45
  done: false
  episode_len_mean: 989.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 275
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.379258394241333
          entropy_coeff: 0.009999999999999998
          kl: 0.013735404813741229
          policy_loss: -0.060682261983553566
          total_loss: -0.08390228417184618
          vf_explained_var: -0.8417788743972778
          vf_loss: 0.0005725599860953581
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,274,3024.78,274000,0.14,6,0,989.72




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-09-20_08-51-11
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 276
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.4243655602137246
          entropy_coeff: 0.009999999999999998
          kl: 0.013180816994280904
          policy_loss: -0.042856953417261444
          total_loss: -0.06669919614990552
          vf_explained_var: -0.9810364842414856
          vf_loss: 0.00040140939058296173
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,275,3051.07,275000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-09-20_08-51-22
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 277
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.389793615871006
          entropy_coeff: 0.009999999999999998
          kl: 0.007846828824718738
          policy_loss: 0.0069372158290611375
          total_loss: -0.01651802584528923
          vf_explained_var: -0.7634245753288269
          vf_loss: 0.00044269308960388624
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,276,3062.18,276000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-09-20_08-51-31
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 278
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.394805113474528
          entropy_coeff: 0.009999999999999998
          kl: 0.011357976945811761
          policy_loss: -0.021073949937191274
          total_loss: -0.04431611365742154
          vf_explained_var: -1.0
          vf_loss: 0.0007058830794347968
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,277,3071.27,277000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-09-20_08-51-40
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 279
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.441463494300842
          entropy_coeff: 0.009999999999999998
          kl: 0.013196323496470362
          policy_loss: -0.03249064853621854
          total_loss: -0.05649081841111183
          vf_explained_var: -1.0
          vf_loss: 0.0004144657933567133
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,278,3080.48,278000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-09-20_08-51-50
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 280
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3820389959547255
          entropy_coeff: 0.009999999999999998
          kl: 0.00961921489728979
          policy_loss: -0.0252036412143045
          total_loss: -0.04832779344999128
          vf_explained_var: -0.8184988498687744
          vf_loss: 0.0006962378431732456
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,279,3089.99,279000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-09-20_08-51-59
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 281
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.375773859024048
          entropy_coeff: 0.009999999999999998
          kl: 0.01246724081133325
          policy_loss: -0.018945489823818207
          total_loss: -0.04109142331613435
          vf_explained_var: -0.49919426441192627
          vf_loss: 0.0016118064244639956
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,280,3099.49,280000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-09-20_08-52-09
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 282
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3522069374720256
          entropy_coeff: 0.009999999999999998
          kl: 0.008684300793959841
          policy_loss: -0.007122672783831756
          total_loss: -0.029692257485455936
          vf_explained_var: -0.9571011066436768
          vf_loss: 0.0009524821328038039
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,281,3109.02,281000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-09-20_08-52-18
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 283
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3680029736624824
          entropy_coeff: 0.009999999999999998
          kl: 0.015012349820034071
          policy_loss: 0.00826812916331821
          total_loss: -0.014295448569787874
          vf_explained_var: -0.6485196352005005
          vf_loss: 0.0011164500055504808
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,282,3118.37,282000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-09-20_08-52-28
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 284
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.324253545867072
          entropy_coeff: 0.009999999999999998
          kl: 0.011510727758047344
          policy_loss: -0.042411960164705914
          total_loss: -0.06473205466237333
          vf_explained_var: -1.0
          vf_loss: 0.0009224398200684744
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,283,3128.04,283000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-09-20_08-52-38
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 285
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3313339286380343
          entropy_coeff: 0.009999999999999998
          kl: 0.010267656344616407
          policy_loss: -0.02704360294673178
          total_loss: -0.04962539548675219
          vf_explained_var: -0.9915992617607117
          vf_loss: 0.0007315458306240746
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,284,3137.66,284000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-09-20_08-52-47
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 286
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3104964097340903
          entropy_coeff: 0.009999999999999998
          kl: 0.010360600897727343
          policy_loss: -0.00651000551879406
          total_loss: -0.02888220945994059
          vf_explained_var: -1.0
          vf_loss: 0.0007327606384125021
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,285,3147.21,285000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-09-20_08-52-57
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 287
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.3343385457992554
          entropy_coeff: 0.009999999999999998
          kl: 0.011350833382286889
          policy_loss: -0.017053806864553028
          total_loss: -0.03965656090941694
          vf_explained_var: -0.8305292725563049
          vf_loss: 0.0007406311719225616
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,286,3156.85,286000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-09-20_08-53-07
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 288
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2909776634640164
          entropy_coeff: 0.009999999999999998
          kl: 0.01691257912777616
          policy_loss: 0.0024274758994579314
          total_loss: -0.019734515912002988
          vf_explained_var: -0.8561962842941284
          vf_loss: 0.0007477839131348042
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,287,3166.48,287000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-09-20_08-53-16
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 289
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.356933681170146
          entropy_coeff: 0.009999999999999998
          kl: 0.013607771904375622
          policy_loss: -0.011292488076206711
          total_loss: -0.03373530196646849
          vf_explained_var: -1.0
          vf_loss: 0.0011265223355925022
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,288,3176.19,288000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-09-20_08-53-26
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 290
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1581618110338847
          entropy_coeff: 0.009999999999999998
          kl: 0.011891925113392255
          policy_loss: -0.02463793522781796
          total_loss: -0.044849153152770466
          vf_explained_var: -0.9280676245689392
          vf_loss: 0.0013704000933406253
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,289,3186.12,289000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-09-20_08-53-36
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 291
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1029404679934185
          entropy_coeff: 0.009999999999999998
          kl: 0.015746687919883425
          policy_loss: -0.03193108526368936
          total_loss: -0.05174879026081827
          vf_explained_var: -1.0
          vf_loss: 0.0012116990343201905
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,290,3196.1,290000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-09-20_08-53-46
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 292
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1527323034074572
          entropy_coeff: 0.009999999999999998
          kl: 0.009008502349817881
          policy_loss: -0.055260285569561855
          total_loss: -0.07546349014672968
          vf_explained_var: -0.20174843072891235
          vf_loss: 0.0013241143961850968
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,291,3206.23,291000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-09-20_08-53-56
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 293
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2632333199183146
          entropy_coeff: 0.009999999999999998
          kl: 0.014458344182126718
          policy_loss: -0.026834586076438426
          total_loss: -0.04862603356854783
          vf_explained_var: -1.0
          vf_loss: 0.0008408841554127219
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,292,3216.19,292000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-09-20_08-54-07
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 294
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2323268996344674
          entropy_coeff: 0.009999999999999998
          kl: 0.01413868073046483
          policy_loss: -0.015069796558883455
          total_loss: -0.036065067764785554
          vf_explained_var: -0.9930346012115479
          vf_loss: 0.0013279940008134063
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,293,3226.36,293000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-09-20_08-54-17
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 295
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.2354129791259765
          entropy_coeff: 0.009999999999999998
          kl: 0.011268117600415826
          policy_loss: -0.01809670709901386
          total_loss: -0.03977206870913506
          vf_explained_var: -0.8475673198699951
          vf_loss: 0.0006787663770309235
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,294,3236.3,294000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-09-20_08-54-27
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 296
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.182122270266215
          entropy_coeff: 0.009999999999999998
          kl: 0.009806161757987139
          policy_loss: 0.014514822285208437
          total_loss: -0.006204095213777489
          vf_explained_var: -1.0
          vf_loss: 0.0011023044599116677
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,295,3246.31,295000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-09-20_08-54-37
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 297
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.098398049672445
          entropy_coeff: 0.009999999999999998
          kl: 0.014606004186640062
          policy_loss: -0.005391879503925641
          total_loss: -0.025152156295047867
          vf_explained_var: -0.6692209243774414
          vf_loss: 0.0012237035417961629
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,296,3256.22,296000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-09-20_08-54-47
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 298
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.002440477742089
          entropy_coeff: 0.009999999999999998
          kl: 0.01073700930420082
          policy_loss: -0.030078403527537982
          total_loss: -0.04831727362341351
          vf_explained_var: -1.0
          vf_loss: 0.0017855318787042052
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,297,3266.5,297000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-09-20_08-54-57
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 299
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9397928661770292
          entropy_coeff: 0.009999999999999998
          kl: 0.011174495830275294
          policy_loss: -0.056919901818037036
          total_loss: -0.07454494097166592
          vf_explained_var: -0.7125994563102722
          vf_loss: 0.001772889095850082
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,298,3276.81,298000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-09-20_08-55-07
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 300
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.102639881769816
          entropy_coeff: 0.009999999999999998
          kl: 0.009247507444929168
          policy_loss: -0.05681966236895985
          total_loss: -0.07514926981594827
          vf_explained_var: -0.48798954486846924
          vf_loss: 0.0026967923098709435
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,299,3287.04,299000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-09-20_08-55-18
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 301
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.0985743602116904
          entropy_coeff: 0.009999999999999998
          kl: 0.013381520267875166
          policy_loss: -0.013504946024881469
          total_loss: -0.03207725433425771
          vf_explained_var: -0.6296753287315369
          vf_loss: 0.002413432545856469
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,300,3297.33,300000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-09-20_08-55-28
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 302
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.1433238082461887
          entropy_coeff: 0.009999999999999998
          kl: 0.014248006860677146
          policy_loss: -0.021517375194364124
          total_loss: -0.04095552522275183
          vf_explained_var: -0.6097330451011658
          vf_loss: 0.001995087103730637
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,301,3307.41,301000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-09-20_08-55-38
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 303
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.204092110527886
          entropy_coeff: 0.009999999999999998
          kl: 0.011599038636277273
          policy_loss: 0.005565102977885141
          total_loss: -0.015368674157394303
          vf_explained_var: -0.5321621298789978
          vf_loss: 0.0011071437478272451
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,302,3317.35,302000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-09-20_08-55-48
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 304
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.232060522503323
          entropy_coeff: 0.009999999999999998
          kl: 0.015532330189856757
          policy_loss: 0.00504051542116536
          total_loss: -0.016604626551270485
          vf_explained_var: -1.0
          vf_loss: 0.000675463135768142
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,303,3327.44,303000,0.14,6,0,989.8


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-09-20_08-55-58
  done: false
  episode_len_mean: 989.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 305
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.249314702881707
          entropy_coeff: 0.009999999999999998
          kl: 0.013337176037753433
          policy_loss: -0.11275712876684135
          total_loss: -0.13467513442867332
          vf_explained_var: -0.8719691634178162
          vf_loss: 0.0005751420299121593
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,304,3337.32,304000,0.14,6,0,989.8




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-09-20_08-56-25
  done: false
  episode_len_mean: 988.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 306
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.95079210749827e-10
          cur_lr: 5.000000000000001e-05
          entropy: 2.026560029718611
          entropy_coeff: 0.009999999999999998
          kl: 0.020882477861461052
          policy_loss: -0.02204453961716758
          total_loss: -0.04071380934781498
          vf_explained_var: -1.0
          vf_loss: 0.0015963306378883621
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,305,3364.65,305000,0.14,6,0,988.41


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-09-20_08-56-35
  done: false
  episode_len_mean: 988.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 307
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.062704602877299
          entropy_coeff: 0.009999999999999998
          kl: 0.007512419996558448
          policy_loss: -0.06508183860116534
          total_loss: -0.08458720975452
          vf_explained_var: -1.0
          vf_loss: 0.0011216745149188986
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,306,3374.75,306000,0.14,6,0,988.41


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-09-20_08-56-45
  done: false
  episode_len_mean: 988.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 308
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2280228005515204
          entropy_coeff: 0.009999999999999998
          kl: 0.01633597888107576
          policy_loss: -0.03590422154714664
          total_loss: -0.05721833058115509
          vf_explained_var: -0.9997650384902954
          vf_loss: 0.0009661203466950813
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,307,3384.36,307000,0.14,6,0,988.41


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-09-20_08-56-55
  done: false
  episode_len_mean: 988.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 309
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0457485874493915
          entropy_coeff: 0.009999999999999998
          kl: 0.01547536355771339
          policy_loss: -0.09048487684792943
          total_loss: -0.10775295462873247
          vf_explained_var: -0.11402934044599533
          vf_loss: 0.0031894069371951952
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,308,3394.6,308000,0.14,6,0,988.41


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-09-20_08-57-06
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 310
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0913800345526803
          entropy_coeff: 0.009999999999999998
          kl: 0.011223826575213794
          policy_loss: -0.07493143880532847
          total_loss: -0.09472754407260153
          vf_explained_var: -0.6622482538223267
          vf_loss: 0.001117692325108995
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,309,3405.06,309000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-09-20_08-57-16
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 311
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0436989068984985
          entropy_coeff: 0.009999999999999998
          kl: 0.011914767027591585
          policy_loss: -0.04801127033101188
          total_loss: -0.0673986677494314
          vf_explained_var: -0.8374130129814148
          vf_loss: 0.0010495891128407997
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,310,3415.52,310000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-09-20_08-57-26
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 312
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.117295103602939
          entropy_coeff: 0.009999999999999998
          kl: 0.019949126852692543
          policy_loss: -0.05527011396156417
          total_loss: -0.0752996675670147
          vf_explained_var: -1.0
          vf_loss: 0.001143396084403826
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,311,3425.61,311000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-09-20_08-57-37
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 313
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0683085759480795
          entropy_coeff: 0.009999999999999998
          kl: 0.010633903359365742
          policy_loss: -0.0433898346291648
          total_loss: -0.06305102759765255
          vf_explained_var: -0.8942933678627014
          vf_loss: 0.001021892023143462
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,312,3435.91,312000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-09-20_08-57-47
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 314
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3426188161247413e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.187079350153605
          entropy_coeff: 0.009999999999999998
          kl: 0.02086976355099157
          policy_loss: -0.055055436160829335
          total_loss: -0.07584635822309388
          vf_explained_var: -0.9755364060401917
          vf_loss: 0.001079868047640452
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,313,3446.03,313000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-09-20_08-57-57
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 315
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0003025081422594
          entropy_coeff: 0.009999999999999998
          kl: 0.010271766292336082
          policy_loss: -0.06437144037336111
          total_loss: -0.0833258282393217
          vf_explained_var: -1.0
          vf_loss: 0.0010486350724628816
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,314,3456.43,314000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-09-20_08-58-07
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 316
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.300928001933628
          entropy_coeff: 0.009999999999999998
          kl: 0.010843408852153682
          policy_loss: -0.08566222579942809
          total_loss: -0.10737602727280723
          vf_explained_var: -0.5477737784385681
          vf_loss: 0.001295476079556263
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,315,3466.55,315000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-09-20_08-58-18
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 317
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2134029706319174
          entropy_coeff: 0.009999999999999998
          kl: 0.015319977625346315
          policy_loss: -0.05018196531261007
          total_loss: -0.07098379149619076
          vf_explained_var: -0.8537254929542542
          vf_loss: 0.0013322040684417718
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,316,3476.76,316000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-09-20_08-58-28
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 318
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0998639133241443
          entropy_coeff: 0.009999999999999998
          kl: 0.01118889592750995
          policy_loss: 0.10491653362082111
          total_loss: 0.08516635994116466
          vf_explained_var: -0.44353827834129333
          vf_loss: 0.0012484650664393686
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,317,3487.19,317000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-09-20_08-58-38
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 319
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.999651273091634
          entropy_coeff: 0.009999999999999998
          kl: 0.01046915787037107
          policy_loss: 0.003167700229419602
          total_loss: -0.013338761321372456
          vf_explained_var: -0.30277082324028015
          vf_loss: 0.003490054148197588
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,318,3497.59,318000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-09-20_08-58-49
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 320
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0994546916749743
          entropy_coeff: 0.009999999999999998
          kl: 0.01357756778002769
          policy_loss: -0.08296470650368266
          total_loss: -0.1022138940791289
          vf_explained_var: -0.6501365303993225
          vf_loss: 0.0017453555315215554
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,319,3507.99,319000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-09-20_08-58-59
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 321
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9345219559139675
          entropy_coeff: 0.009999999999999998
          kl: 0.0127443171520414
          policy_loss: -0.08948735801710023
          total_loss: -0.10778601004017724
          vf_explained_var: -0.6205276250839233
          vf_loss: 0.0010465697353033141
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,320,3518.32,320000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-09-20_08-59-10
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 322
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9840341713693408
          entropy_coeff: 0.009999999999999998
          kl: 0.015872220059137
          policy_loss: -0.06600541406207615
          total_loss: -0.08351151624487506
          vf_explained_var: 0.24936996400356293
          vf_loss: 0.002334240953334504
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,321,3528.63,321000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-09-20_08-59-20
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 323
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.770791945192549
          entropy_coeff: 0.009999999999999998
          kl: 0.013751627675274432
          policy_loss: -0.12050545646084679
          total_loss: -0.13583121283186805
          vf_explained_var: -0.09318994730710983
          vf_loss: 0.0023821631659908843
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,322,3539.21,322000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-09-20_08-59-31
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 324
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8447987371020846
          entropy_coeff: 0.009999999999999998
          kl: 0.014751771552577984
          policy_loss: -0.031162238948875005
          total_loss: -0.04690084093146854
          vf_explained_var: -0.4857788383960724
          vf_loss: 0.0027093824723528493
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,323,3549.78,323000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-09-20_08-59-41
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 325
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9177843888600667
          entropy_coeff: 0.009999999999999998
          kl: 0.009514682794037792
          policy_loss: -0.1364943855545587
          total_loss: -0.15436610980994173
          vf_explained_var: -0.46343743801116943
          vf_loss: 0.0013061176183530026
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,324,3560.19,324000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-09-20_08-59-52
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 326
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6299496094385784
          entropy_coeff: 0.009999999999999998
          kl: 0.014096748314999116
          policy_loss: 0.04138649163974656
          total_loss: 0.026895137131214143
          vf_explained_var: -0.8668630123138428
          vf_loss: 0.0018081407424890333
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,325,3570.53,325000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-09-20_09-00-02
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 327
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8960623489485846
          entropy_coeff: 0.009999999999999998
          kl: 0.015597530013518284
          policy_loss: 0.0042761256297429405
          total_loss: -0.012390322279598978
          vf_explained_var: -0.6761796474456787
          vf_loss: 0.0022941763777958434
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,326,3581.01,326000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-09-20_09-00-12
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 328
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9237391630808511
          entropy_coeff: 0.009999999999999998
          kl: 0.015095128649213101
          policy_loss: -0.09256527506642871
          total_loss: -0.11031974057356517
          vf_explained_var: -0.9910832643508911
          vf_loss: 0.0014829271000861707
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,327,3591.34,327000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-09-20_09-00-23
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 329
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0319360865486993
          entropy_coeff: 0.009999999999999998
          kl: 0.016778605385882845
          policy_loss: -0.0790940419667297
          total_loss: -0.09816219992935657
          vf_explained_var: -0.7156045436859131
          vf_loss: 0.0012512039043940603
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,328,3601.77,328000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-09-20_09-00-33
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 330
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.018252999252743
          entropy_coeff: 0.009999999999999998
          kl: 0.011271112338864124
          policy_loss: -0.04546316969725821
          total_loss: -0.06392278985844718
          vf_explained_var: -1.0
          vf_loss: 0.0017229094396397058
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,329,3611.99,329000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-09-20_09-00-43
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 331
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.055461588170793
          entropy_coeff: 0.009999999999999998
          kl: 0.017087448160798463
          policy_loss: -0.013995432729522387
          total_loss: -0.03360205880469746
          vf_explained_var: -1.0
          vf_loss: 0.000947993131639022
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,330,3622.22,330000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-09-20_09-00-54
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 332
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.869243366188473
          entropy_coeff: 0.009999999999999998
          kl: 0.010718171127281077
          policy_loss: 0.0072994300681683754
          total_loss: -0.010349637187189526
          vf_explained_var: 0.194123312830925
          vf_loss: 0.0010433669143822045
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,331,3632.44,331000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-09-20_09-01-04
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 333
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.092982510725657
          entropy_coeff: 0.009999999999999998
          kl: 0.011210393196241607
          policy_loss: -0.09096401292416785
          total_loss: -0.11025383215811517
          vf_explained_var: -0.8017458319664001
          vf_loss: 0.0016400045566519516
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,332,3642.61,332000,0.14,6,0,989.78


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-09-20_09-01-14
  done: false
  episode_len_mean: 989.78
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 334
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8832682728767396
          entropy_coeff: 0.009999999999999998
          kl: 0.010684431351590007
          policy_loss: -0.060662849847641254
          total_loss: -0.07880938794049952
          vf_explained_var: -0.6499091386795044
          vf_loss: 0.0006861437380494964
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,333,3653.08,333000,0.14,6,0,989.78




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-09-20_09-01-42
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 336
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6861766987376743
          entropy_coeff: 0.009999999999999998
          kl: 0.011740922374909365
          policy_loss: 0.0827224136226707
          total_loss: 0.06637022553218735
          vf_explained_var: -0.04154425859451294
          vf_loss: 0.000509578434866853
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,334,3680.83,334000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-09-20_09-01-54
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 337
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0098135232925416
          entropy_coeff: 0.009999999999999998
          kl: 0.011404378065441279
          policy_loss: -0.00696575144926707
          total_loss: -0.024039374084936248
          vf_explained_var: -0.38618576526641846
          vf_loss: 0.0030245115126793583
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,335,3692.96,335000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-09-20_09-02-04
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 338
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.157129028108385
          entropy_coeff: 0.009999999999999998
          kl: 0.009757781414923351
          policy_loss: -0.06758109662267897
          total_loss: -0.08814562062422435
          vf_explained_var: -0.7283045649528503
          vf_loss: 0.001006766526390695
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,336,3703.18,336000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-09-20_09-02-15
  done: false
  episode_len_mean: 988.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 339
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.80059341457155
          entropy_coeff: 0.009999999999999998
          kl: 0.01311276498139065
          policy_loss: -0.007885323133733538
          total_loss: -0.02510514925751421
          vf_explained_var: -1.0
          vf_loss: 0.0007861073652748018
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,337,3713.53,337000,0.14,6,0,988.35


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-09-20_09-02-25
  done: false
  episode_len_mean: 989.69
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 340
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1155417025089265
          entropy_coeff: 0.009999999999999998
          kl: 0.017022155285288124
          policy_loss: 0.04762985524204042
          total_loss: 0.037294027540418835
          vf_explained_var: -0.2400083690881729
          vf_loss: 0.0008195892503459213
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,338,3723.67,338000,0.14,6,0,989.69


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-09-20_09-02-35
  done: false
  episode_len_mean: 989.69
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 341
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.631358073817359
          entropy_coeff: 0.009999999999999998
          kl: 0.011740357043651646
          policy_loss: -0.005621825282772382
          total_loss: -0.020600022044446734
          vf_explained_var: -0.939819872379303
          vf_loss: 0.001335383860471969
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,339,3733.91,339000,0.14,6,0,989.69


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-09-20_09-02-46
  done: false
  episode_len_mean: 989.69
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.14
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 342
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.73986400630739
          entropy_coeff: 0.009999999999999998
          kl: 0.0146552926150071
          policy_loss: -0.013218393052617709
          total_loss: -0.02949643979469935
          vf_explained_var: -0.903781533241272
          vf_loss: 0.001120594557788637
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,340,3744.2,340000,0.14,6,0,989.69


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-09-20_09-02-56
  done: false
  episode_len_mean: 989.69
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.12
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 343
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.796901528040568
          entropy_coeff: 0.009999999999999998
          kl: 0.010816174865078566
          policy_loss: 0.04513986470798651
          total_loss: 0.027995184332960183
          vf_explained_var: -1.0
          vf_loss: 0.0008243355925919282
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,341,3754.52,341000,0.12,6,0,989.69


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-09-20_09-03-06
  done: false
  episode_len_mean: 990.64
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.08
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 344
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6041576411989
          entropy_coeff: 0.009999999999999998
          kl: 0.00998040307328518
          policy_loss: -0.046206901574300394
          total_loss: -0.06064851118458642
          vf_explained_var: -0.952265739440918
          vf_loss: 0.0015999674344331855
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,342,3764.92,342000,0.08,6,0,990.64


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-09-20_09-03-17
  done: false
  episode_len_mean: 990.64
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.07
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 345
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6834089504347907
          entropy_coeff: 0.009999999999999998
          kl: 0.01522900557341684
          policy_loss: -0.02894689641478989
          total_loss: -0.043433654639456004
          vf_explained_var: -0.9027567505836487
          vf_loss: 0.0023473309087825734
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,343,3775.22,343000,0.07,6,0,990.64


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-09-20_09-03-27
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 346
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7552874326705932
          entropy_coeff: 0.009999999999999998
          kl: 0.014131106007666282
          policy_loss: -0.01969339499870936
          total_loss: -0.03601232427689764
          vf_explained_var: -0.7425684332847595
          vf_loss: 0.0012339446732463936
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,344,3785.54,344000,0.01,1,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-09-20_09-03-37
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 347
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.847985037167867
          entropy_coeff: 0.009999999999999998
          kl: 0.016218574533739212
          policy_loss: -0.018279135185811254
          total_loss: -0.03405156458417575
          vf_explained_var: -0.8972365260124207
          vf_loss: 0.0027074175265928107
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,345,3795.86,345000,0.01,1,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-09-20_09-03-48
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 348
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.010981723997328
          entropy_coeff: 0.009999999999999998
          kl: 0.011256163288154219
          policy_loss: 0.0013257538278897603
          total_loss: -0.01789643350574705
          vf_explained_var: -0.9426462650299072
          vf_loss: 0.0008876274527412736
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,346,3806.16,346000,0.01,1,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-09-20_09-03-58
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 349
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8261226892471314
          entropy_coeff: 0.009999999999999998
          kl: 0.012028761787515347
          policy_loss: -0.08466058642499977
          total_loss: -0.10219204119510121
          vf_explained_var: -1.0
          vf_loss: 0.0007297697519081541
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,347,3816.52,347000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-09-20_09-04-09
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 350
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3449564576148987
          entropy_coeff: 0.009999999999999998
          kl: 0.009750839274406826
          policy_loss: 0.021687880655129752
          total_loss: 0.008809427420298258
          vf_explained_var: -0.5450438261032104
          vf_loss: 0.0005711102321381784
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,348,3827.1,348000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-09-20_09-04-19
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 351
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6863453639878168
          entropy_coeff: 0.009999999999999998
          kl: 0.010519714096266182
          policy_loss: 0.021620048334201176
          total_loss: 0.006174839205212063
          vf_explained_var: -0.6187828779220581
          vf_loss: 0.001418244550910054
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,349,3837.45,349000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-09-20_09-04-29
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 352
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6633166432380677
          entropy_coeff: 0.009999999999999998
          kl: 0.007931560774546122
          policy_loss: -0.006783666171961361
          total_loss: -0.022806470634208785
          vf_explained_var: -1.0
          vf_loss: 0.0006103645528330365
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,350,3847.75,350000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-09-20_09-04-40
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 353
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.752258343166775
          entropy_coeff: 0.009999999999999998
          kl: 0.00869888092202989
          policy_loss: -0.0033029888653092914
          total_loss: -0.020373736073573432
          vf_explained_var: -1.0
          vf_loss: 0.00045183534126004414
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,351,3858.07,351000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-09-20_09-04-50
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 354
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8676979568269518
          entropy_coeff: 0.009999999999999998
          kl: 0.01804413396074934
          policy_loss: -0.013724397122859954
          total_loss: -0.03205017372965813
          vf_explained_var: -0.9989205002784729
          vf_loss: 0.00035120367522015135
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,352,3868.46,352000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-09-20_09-05-00
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 355
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9584301657146879
          entropy_coeff: 0.009999999999999998
          kl: 0.010644265267026595
          policy_loss: 0.017947673425078393
          total_loss: -0.001247640699148178
          vf_explained_var: -0.908778190612793
          vf_loss: 0.0003889875425051691
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,353,3878.81,353000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-09-20_09-05-11
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 356
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8060213605562845
          entropy_coeff: 0.009999999999999998
          kl: 0.01228680911946248
          policy_loss: -0.042320972349908614
          total_loss: -0.060035730939772394
          vf_explained_var: -1.0
          vf_loss: 0.00034545447997516023
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,354,3889.16,354000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-09-20_09-05-21
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 357
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8955860561794704
          entropy_coeff: 0.009999999999999998
          kl: 0.013961451595268393
          policy_loss: -0.05520150578684277
          total_loss: -0.07386516883141464
          vf_explained_var: -1.0
          vf_loss: 0.00029219357683258647
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,355,3899.42,355000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-09-20_09-05-31
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 358
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8901180028915405
          entropy_coeff: 0.009999999999999998
          kl: 0.009938167271196442
          policy_loss: -0.04352460551179117
          total_loss: -0.0622464744374156
          vf_explained_var: -0.7982991337776184
          vf_loss: 0.00017931225536611036
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,356,3909.7,356000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-09-20_09-05-42
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 359
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8760122484631008
          entropy_coeff: 0.009999999999999998
          kl: 0.012122510533265185
          policy_loss: -0.059611778230302864
          total_loss: -0.07806617522405254
          vf_explained_var: -1.0
          vf_loss: 0.00030572271837930507
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,357,3920.04,357000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-09-20_09-05-52
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 360
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8127271387312147
          entropy_coeff: 0.009999999999999998
          kl: 0.012520109630799976
          policy_loss: -0.03161048835350407
          total_loss: -0.04927269696361489
          vf_explained_var: -0.7498488426208496
          vf_loss: 0.00046506053590241614
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,358,3930.37,358000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-09-20_09-06-03
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 361
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8401539007822671
          entropy_coeff: 0.009999999999999998
          kl: 0.00822200914964968
          policy_loss: -0.04551645898156696
          total_loss: -0.06371384888059563
          vf_explained_var: -1.0
          vf_loss: 0.00020414852778129797
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,359,3940.72,359000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-09-20_09-06-13
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 362
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.018242863814036
          entropy_coeff: 0.009999999999999998
          kl: 0.01379720221978579
          policy_loss: 0.00730291861626837
          total_loss: -0.012738024608956443
          vf_explained_var: -0.5952121019363403
          vf_loss: 0.00014148283654422914
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,360,3950.94,360000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-09-20_09-06-23
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 363
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7608356727494134
          entropy_coeff: 0.009999999999999998
          kl: 0.011436990897925704
          policy_loss: -0.04346255974637137
          total_loss: -0.060747055812842315
          vf_explained_var: -0.9222167730331421
          vf_loss: 0.0003238599901022907
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,361,3961.21,361000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-09-20_09-06-33
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 364
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.013928224187112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8843482984436883
          entropy_coeff: 0.009999999999999998
          kl: 0.020601910182461856
          policy_loss: -0.025961029902100564
          total_loss: -0.04415833467824592
          vf_explained_var: -0.9952840209007263
          vf_loss: 0.0006461766496714619
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,362,3971.61,362000,0,0,0,995.81


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-09-20_09-06-44
  done: false
  episode_len_mean: 995.81
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 365
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8940867529975043
          entropy_coeff: 0.009999999999999998
          kl: 0.008345625955042815
          policy_loss: -0.0035451754099792903
          total_loss: -0.02223045258886284
          vf_explained_var: -0.37951958179473877
          vf_loss: 0.0002555903723178845
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,363,3981.91,363000,0,0,0,995.81




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-09-20_09-07-11
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 366
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9107410351435343
          entropy_coeff: 0.009999999999999998
          kl: 0.01443156061392392
          policy_loss: 0.0036829425228966607
          total_loss: -0.015132850118809276
          vf_explained_var: -0.909946858882904
          vf_loss: 0.0002916176770264024
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,364,4009.44,364000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-09-20_09-07-24
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 367
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7947554654545255
          entropy_coeff: 0.009999999999999998
          kl: 0.017920075823055253
          policy_loss: -0.0311989544166459
          total_loss: -0.04891816746029589
          vf_explained_var: -1.0
          vf_loss: 0.00022834085838338878
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,365,4021.63,365000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-09-20_09-07-34
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 368
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9446747501691182
          entropy_coeff: 0.009999999999999998
          kl: 0.015439438547156294
          policy_loss: -0.024097419312844672
          total_loss: -0.04309532623738051
          vf_explained_var: -1.0
          vf_loss: 0.00044884000833715415
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,366,4031.98,366000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-09-20_09-07-44
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 369
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9696792748239305
          entropy_coeff: 0.009999999999999998
          kl: 0.01442175566649979
          policy_loss: -0.07470869256390465
          total_loss: -0.09398553768793742
          vf_explained_var: -0.6688672304153442
          vf_loss: 0.00041994651773064913
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,367,4042.36,367000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-09-20_09-07-55
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 370
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9751529852549234
          entropy_coeff: 0.009999999999999998
          kl: 0.01411486182110604
          policy_loss: -0.07122141482929388
          total_loss: -0.09072381841639678
          vf_explained_var: -0.7635213136672974
          vf_loss: 0.0002491248732743164
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,368,4052.72,368000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-09-20_09-08-05
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 371
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7838236742549471
          entropy_coeff: 0.009999999999999998
          kl: 0.008548316015033643
          policy_loss: -0.027441255665487715
          total_loss: -0.044171701040532856
          vf_explained_var: -0.5473139882087708
          vf_loss: 0.0011077897697557798
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,369,4062.93,369000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-09-20_09-08-15
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 372
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8879713773727418
          entropy_coeff: 0.009999999999999998
          kl: 0.01686876286866238
          policy_loss: -0.04249337104459604
          total_loss: -0.061081312720974286
          vf_explained_var: -0.914550244808197
          vf_loss: 0.0002917717775239402
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,370,4073.25,370000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-09-20_09-08-25
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 373
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1834172507127125
          entropy_coeff: 0.009999999999999998
          kl: 0.010065540335926887
          policy_loss: -0.03541559477647146
          total_loss: -0.04293858036398888
          vf_explained_var: -1.0
          vf_loss: 0.004311185464676883
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,371,4083.38,371000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-09-20_09-08-36
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 374
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8831527670224508
          entropy_coeff: 0.009999999999999998
          kl: 0.008604920252512511
          policy_loss: -0.05704192680617173
          total_loss: -0.07541844488845931
          vf_explained_var: -0.8026520609855652
          vf_loss: 0.0004550106084530449
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,372,4093.65,372000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-09-20_09-08-46
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 375
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.262693785296546
          entropy_coeff: 0.009999999999999998
          kl: 0.01515705861166246
          policy_loss: -0.05432668384164572
          total_loss: -0.06453993349439568
          vf_explained_var: -0.6152443885803223
          vf_loss: 0.0024136875882201517
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,373,4103.72,373000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-09-20_09-08-56
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 376
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0041294627719455
          entropy_coeff: 0.009999999999999998
          kl: 0.019824417719006386
          policy_loss: -0.025136521955331167
          total_loss: -0.044563160671128164
          vf_explained_var: -0.7615750432014465
          vf_loss: 0.0006146559462649748
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,374,4114.16,374000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-09-20_09-09-07
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 377
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9977169659402636
          entropy_coeff: 0.009999999999999998
          kl: 0.01244499084020752
          policy_loss: 0.0011213242179817623
          total_loss: -0.017559928033086987
          vf_explained_var: -0.19856683909893036
          vf_loss: 0.0012959182566393995
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,375,4124.48,375000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-09-20_09-09-17
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 378
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0742574863963656
          entropy_coeff: 0.009999999999999998
          kl: 0.011141728384185147
          policy_loss: -0.021854462433192466
          total_loss: -0.041986641743116906
          vf_explained_var: -0.5035164952278137
          vf_loss: 0.0006103945512829038
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,376,4134.79,376000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-09-20_09-09-27
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 379
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9896836837132772
          entropy_coeff: 0.009999999999999998
          kl: 0.016999348225088796
          policy_loss: -0.022110924797339573
          total_loss: -0.04134418129300078
          vf_explained_var: -1.0
          vf_loss: 0.0006635789892041228
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,377,4145.12,377000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-09-20_09-09-37
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 380
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9655896716647678
          entropy_coeff: 0.009999999999999998
          kl: 0.013959467392985264
          policy_loss: 0.0073762545569075475
          total_loss: -0.011407380054394404
          vf_explained_var: -0.8866796493530273
          vf_loss: 0.0008722611450745414
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,378,4155.23,378000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-09-20_09-09-48
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 381
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1679245948791506
          entropy_coeff: 0.009999999999999998
          kl: 0.010193247935443904
          policy_loss: -0.05607626001454062
          total_loss: -0.07728864571286573
          vf_explained_var: -1.0
          vf_loss: 0.0004668598362412821
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,379,4165.49,379000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-09-20_09-09-58
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 382
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1276786539289687
          entropy_coeff: 0.009999999999999998
          kl: 0.014007082463956759
          policy_loss: 0.07180367733041446
          total_loss: 0.05156209899319543
          vf_explained_var: -0.7932374477386475
          vf_loss: 0.001035207939437694
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,380,4175.79,380000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-09-20_09-10-08
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 383
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1272888739903766
          entropy_coeff: 0.009999999999999998
          kl: 0.01321182049193668
          policy_loss: -0.005614537476665444
          total_loss: -0.0261874218367868
          vf_explained_var: -0.16491971909999847
          vf_loss: 0.0007000008804930581
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,381,4186.08,381000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-09-20_09-10-19
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 384
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.12733338409
          entropy_coeff: 0.009999999999999998
          kl: 0.018021790656041696
          policy_loss: -0.04921826480163468
          total_loss: -0.06978708472516802
          vf_explained_var: -1.0
          vf_loss: 0.0007045122461729786
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,382,4196.45,382000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-09-20_09-10-29
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 385
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0930887725618152
          entropy_coeff: 0.009999999999999998
          kl: 0.009002141618687896
          policy_loss: -0.00790451533264584
          total_loss: -0.027915518689486715
          vf_explained_var: -1.0
          vf_loss: 0.0009198867153221121
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,383,4206.7,383000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-09-20_09-10-39
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 386
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.183514263894823
          entropy_coeff: 0.009999999999999998
          kl: 0.011655875579318142
          policy_loss: -0.015509348776605394
          total_loss: -0.03652883134782314
          vf_explained_var: -1.0
          vf_loss: 0.0008156601145553092
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,384,4216.94,384000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-09-20_09-10-50
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 387
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1962050278981526
          entropy_coeff: 0.009999999999999998
          kl: 0.015049018227102057
          policy_loss: -0.02935176950155033
          total_loss: -0.050669442489743234
          vf_explained_var: -1.0
          vf_loss: 0.0006443746145426606
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,385,4227.22,385000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-09-20_09-11-00
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 388
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9845051553514268
          entropy_coeff: 0.009999999999999998
          kl: 0.01108235381229682
          policy_loss: -0.01955809982286559
          total_loss: -0.03806338699327575
          vf_explained_var: -1.0
          vf_loss: 0.0013397639119325
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  node_ip: 192.168.1.100
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,386,4237.42,386000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-09-20_09-11-10
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 389
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6920095403989157
          entropy_coeff: 0.009999999999999998
          kl: 0.01067661874949353
          policy_loss: -0.01796481278207567
          total_loss: -0.03184242389268345
          vf_explained_var: -1.0
          vf_loss: 0.0030424835723048696
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,387,4247.68,387000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-09-20_09-11-21
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 390
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0679650518629287
          entropy_coeff: 0.009999999999999998
          kl: 0.008847225883726474
          policy_loss: -0.02650242687927352
          total_loss: -0.04638468482428127
          vf_explained_var: -1.0
          vf_loss: 0.0007973933857606931
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,388,4258.04,388000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-09-20_09-11-31
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 391
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1893095864189998
          entropy_coeff: 0.009999999999999998
          kl: 0.011778919438998336
          policy_loss: -0.01575683289104038
          total_loss: -0.037202921095821594
          vf_explained_var: -0.9992836117744446
          vf_loss: 0.0004470042473662438
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,389,4268.32,389000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-09-20_09-11-41
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 392
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.51615815560023
          entropy_coeff: 0.009999999999999998
          kl: 0.01178433627454106
          policy_loss: 0.009203121024701331
          total_loss: -0.004008252835936017
          vf_explained_var: -1.0
          vf_loss: 0.001950208559477081
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,390,4278.61,390000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-09-20_09-11-52
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 393
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.120890692869822
          entropy_coeff: 0.009999999999999998
          kl: 0.008948308325880502
          policy_loss: -0.03489333970679177
          total_loss: -0.05575164585477776
          vf_explained_var: -1.0
          vf_loss: 0.0003506001143250614
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,391,4288.98,391000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-09-20_09-12-02
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 394
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0590346309873793
          entropy_coeff: 0.009999999999999998
          kl: 0.008891718271663823
          policy_loss: -0.023455757151047387
          total_loss: -0.043687461606330344
          vf_explained_var: -1.0
          vf_loss: 0.0003586399415022849
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,392,4299.25,392000,0,0,0,995.84


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-09-20_09-12-12
  done: false
  episode_len_mean: 995.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 395
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5141222463713753
          entropy_coeff: 0.009999999999999998
          kl: 0.01793582571966519
          policy_loss: 0.014688601013686922
          total_loss: 0.0011882549358738794
          vf_explained_var: -0.8888610005378723
          vf_loss: 0.0016408760055330479
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,393,4309.37,393000,0,0,0,995.84




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-09-20_09-12-39
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 396
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.127577112780677
          entropy_coeff: 0.009999999999999998
          kl: 0.015793868331261347
          policy_loss: -0.010362546932366158
          total_loss: -0.030955348246627382
          vf_explained_var: -1.0
          vf_loss: 0.0006829679371245826
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,394,4336.74,394000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-09-20_09-12-51
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 397
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7617388248443604
          entropy_coeff: 0.009999999999999998
          kl: 0.007103509349581769
          policy_loss: -0.008674983431895574
          total_loss: -0.02496060993936327
          vf_explained_var: -0.5118249654769897
          vf_loss: 0.0013317615550477058
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,395,4348.82,395000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-09-20_09-13-02
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 398
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.842695865366194
          entropy_coeff: 0.009999999999999998
          kl: 0.0190077024573955
          policy_loss: -0.01963281962606642
          total_loss: -0.03584142393536038
          vf_explained_var: -0.357391357421875
          vf_loss: 0.002218351809359673
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,396,4359.18,396000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-09-20_09-13-12
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 399
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.887318774064382
          entropy_coeff: 0.009999999999999998
          kl: 0.015217271962013355
          policy_loss: 0.042066564452317025
          total_loss: 0.02390293065044615
          vf_explained_var: -0.012113730423152447
          vf_loss: 0.0007095546047720644
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,397,4369.07,397000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-09-20_09-13-22
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 400
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.248635032441881
          entropy_coeff: 0.009999999999999998
          kl: 0.011649609874276488
          policy_loss: -0.04934788718819618
          total_loss: -0.07101227976381778
          vf_explained_var: -1.0
          vf_loss: 0.0008219565410399809
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,398,4379.32,398000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-09-20_09-13-32
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 401
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245564301808674
          entropy_coeff: 0.009999999999999998
          kl: 0.010889118865228713
          policy_loss: -0.04277408296863238
          total_loss: -0.06005518306046724
          vf_explained_var: -0.3481798470020294
          vf_loss: 0.0009644647471658472
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,399,4389.26,399000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-09-20_09-13-42
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 402
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.88220444255405
          entropy_coeff: 0.009999999999999998
          kl: 0.012566795575613732
          policy_loss: -0.03787773030085696
          total_loss: -0.056046359530753556
          vf_explained_var: -0.586470901966095
          vf_loss: 0.0006534134742752131
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,400,4399.33,400000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-09-20_09-13-52
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 403
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7362230446603564
          entropy_coeff: 0.009999999999999998
          kl: 0.0076764311162143195
          policy_loss: -0.002258036761648125
          total_loss: -0.018769424657026926
          vf_explained_var: -1.0
          vf_loss: 0.0008508415863616392
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,401,4409.41,401000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-09-20_09-14-02
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 404
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5239896880255805
          entropy_coeff: 0.009999999999999998
          kl: 0.007638299864328271
          policy_loss: -0.13202940714028147
          total_loss: -0.1464924776719676
          vf_explained_var: -1.0
          vf_loss: 0.0007768257249457141
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,402,4419.42,402000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-09-20_09-14-12
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 405
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.153659506638845
          entropy_coeff: 0.009999999999999998
          kl: 0.014773792924781612
          policy_loss: -0.041073141247034074
          total_loss: -0.06176530093782478
          vf_explained_var: -0.8249997496604919
          vf_loss: 0.0008444342073441173
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,403,4429.44,403000,0,0,0,994.47


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-09-20_09-14-22
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 406
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8758321907785205
          entropy_coeff: 0.009999999999999998
          kl: 0.014594222640385773
          policy_loss: -0.023162182172139487
          total_loss: -0.04119448024365637
          vf_explained_var: -0.6061234474182129
          vf_loss: 0.0007260251146767082
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,404,4439.6,404000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-09-20_09-14-32
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 407
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9603298015064663
          entropy_coeff: 0.009999999999999998
          kl: 0.014249025497860614
          policy_loss: -0.08462123697002728
          total_loss: -0.10354243737335006
          vf_explained_var: -0.6235842108726501
          vf_loss: 0.0006820975599111989
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,405,4449.42,405000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-09-20_09-14-42
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 408
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7175604343414306
          entropy_coeff: 0.009999999999999998
          kl: 0.008648766008316539
          policy_loss: 0.0005195174780156877
          total_loss: -0.015671607934766345
          vf_explained_var: -1.0
          vf_loss: 0.000984480088421454
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,406,4459.54,406000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-09-20_09-14-53
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 409
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.88604410621855
          entropy_coeff: 0.009999999999999998
          kl: 0.008752411050349067
          policy_loss: -0.012963990287648306
          total_loss: -0.031180314264363712
          vf_explained_var: -1.0
          vf_loss: 0.0006441161637970557
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,407,4469.6,407000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-09-20_09-15-02
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 410
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8149643659591674
          entropy_coeff: 0.009999999999999998
          kl: 0.01414745965735734
          policy_loss: 0.019650072211192714
          total_loss: 0.0023350149186121094
          vf_explained_var: -0.822951078414917
          vf_loss: 0.000834584978616072
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,408,4479.49,408000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-09-20_09-15-13
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 411
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8500281651814778
          entropy_coeff: 0.009999999999999998
          kl: 0.00927901399502375
          policy_loss: -0.0012130794632765982
          total_loss: -0.018756543948418563
          vf_explained_var: -1.0
          vf_loss: 0.0009568152897473838
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,409,4489.73,409000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-09-20_09-15-23
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 412
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7751640677452087
          entropy_coeff: 0.009999999999999998
          kl: 0.013256705602378658
          policy_loss: 0.014572377047604985
          total_loss: -0.0024089750937289663
          vf_explained_var: -1.0
          vf_loss: 0.0007702889360694422
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,410,4499.86,410000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-09-20_09-15-33
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 413
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9810076859262256
          entropy_coeff: 0.009999999999999998
          kl: 0.011751336577877226
          policy_loss: -0.018397787503070303
          total_loss: -0.0373637487904893
          vf_explained_var: -0.8074290156364441
          vf_loss: 0.0008441138123291441
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,411,4510.01,411000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-09-20_09-15-43
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 414
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9855541825294494
          entropy_coeff: 0.009999999999999998
          kl: 0.014923661271975483
          policy_loss: 0.0046419286893473735
          total_loss: -0.014035247804390059
          vf_explained_var: -0.9170695543289185
          vf_loss: 0.0011783669566890846
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,412,4519.8,412000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-09-20_09-15-53
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 415
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9512968500455221
          entropy_coeff: 0.009999999999999998
          kl: 0.011033304839286847
          policy_loss: 0.038532055252128175
          total_loss: 0.01989890717797809
          vf_explained_var: -0.8815438151359558
          vf_loss: 0.0008798206128024807
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,413,4529.88,413000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-09-20_09-16-03
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 416
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0534922348128424
          entropy_coeff: 0.009999999999999998
          kl: 0.012572720858917648
          policy_loss: 0.07020752835604879
          total_loss: 0.05013747604356872
          vf_explained_var: -0.7662250995635986
          vf_loss: 0.0004648681733265726
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,414,4539.84,414000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-09-20_09-16-13
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 417
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7658489227294922
          entropy_coeff: 0.009999999999999998
          kl: 0.008979541021363453
          policy_loss: -0.04591336213052273
          total_loss: -0.062014836859371925
          vf_explained_var: -0.7272603511810303
          vf_loss: 0.0015570134092639718
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,415,4550.07,415000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-09-20_09-16-23
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 418
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2604023377100626
          entropy_coeff: 0.009999999999999998
          kl: 0.016887869376378577
          policy_loss: 0.02624931074678898
          total_loss: 0.004148087567753262
          vf_explained_var: -0.5186280012130737
          vf_loss: 0.0005027988024974345
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,416,4559.98,416000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-09-20_09-16-33
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 419
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9015836225615608
          entropy_coeff: 0.009999999999999998
          kl: 0.01033781954703825
          policy_loss: -0.05674770399928093
          total_loss: -0.07474231637186474
          vf_explained_var: -0.9916958212852478
          vf_loss: 0.001021224367690997
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,417,4570.29,417000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-09-20_09-16-43
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 420
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2653036726845635
          entropy_coeff: 0.009999999999999998
          kl: 0.009037512351818627
          policy_loss: 0.017232450884249476
          total_loss: -0.0049455128610134125
          vf_explained_var: -0.7787889838218689
          vf_loss: 0.00047507262335986725
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,418,4580.23,418000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-09-20_09-16-54
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 421
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1327550027105544
          entropy_coeff: 0.009999999999999998
          kl: 0.014642114727824316
          policy_loss: 0.02356784012582567
          total_loss: 0.003103982015616364
          vf_explained_var: -0.9752809405326843
          vf_loss: 0.000863692108153676
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,419,4590.51,419000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-09-20_09-17-04
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 422
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6354303465949165
          entropy_coeff: 0.009999999999999998
          kl: 0.008617446109906623
          policy_loss: -0.038220737170841956
          total_loss: -0.05342960419754187
          vf_explained_var: -1.0
          vf_loss: 0.0011454374393603454
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,420,4600.76,420000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-09-20_09-17-14
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 423
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2718797551261054
          entropy_coeff: 0.009999999999999998
          kl: 0.011064328333370099
          policy_loss: -0.06528171077370644
          total_loss: -0.08745999932289124
          vf_explained_var: -0.19362862408161163
          vf_loss: 0.0005405061257786454
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,421,4610.62,421000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-09-20_09-17-24
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 424
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.26447815100352
          entropy_coeff: 0.009999999999999998
          kl: 0.016040847887281984
          policy_loss: -0.10919456978638967
          total_loss: -0.13119404655363825
          vf_explained_var: -0.8435884118080139
          vf_loss: 0.0006453021714454128
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,422,4620.57,422000,0,0,0,995.86


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-09-20_09-17-33
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 425
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.418157214588589
          entropy_coeff: 0.009999999999999998
          kl: 0.013828412672681109
          policy_loss: 0.059731859465440115
          total_loss: 0.03582818251517084
          vf_explained_var: -0.6588250398635864
          vf_loss: 0.0002778951925898178
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,423,4629.98,423000,0,0,0,995.86




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-09-20_09-17-59
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 426
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3883513265185887
          entropy_coeff: 0.009999999999999998
          kl: 0.015359644521368824
          policy_loss: -0.014467311857475174
          total_loss: -0.037395282255278696
          vf_explained_var: -0.7941173315048218
          vf_loss: 0.0009555438301403127
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,424,4655.92,424000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-09-20_09-18-10
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 427
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.423251173231337
          entropy_coeff: 0.009999999999999998
          kl: 0.01425986056848781
          policy_loss: -0.06936195741097133
          total_loss: -0.09320947585834397
          vf_explained_var: -0.7597981691360474
          vf_loss: 0.00038499316376853837
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,425,4666.99,425000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-09-20_09-18-20
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 428
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.449131366941664
          entropy_coeff: 0.009999999999999998
          kl: 0.010963478689821353
          policy_loss: 0.0709134767866797
          total_loss: 0.047034753517558175
          vf_explained_var: -0.729590117931366
          vf_loss: 0.0006125893084370343
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,426,4676.45,426000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-09-20_09-18-29
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 429
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.477964260843065
          entropy_coeff: 0.009999999999999998
          kl: 0.013855818491856513
          policy_loss: -0.0676032529781676
          total_loss: -0.09202060780177514
          vf_explained_var: -0.6305869817733765
          vf_loss: 0.0003622862968768459
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,427,4686.02,427000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-09-20_09-18-39
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 430
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.343858427471585
          entropy_coeff: 0.009999999999999998
          kl: 0.009112387109810684
          policy_loss: -0.10426198744939433
          total_loss: -0.12649172983235782
          vf_explained_var: -0.9401228427886963
          vf_loss: 0.0012088412909684444
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,428,4696.09,428000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-09-20_09-18-50
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 431
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9171402547094556
          entropy_coeff: 0.009999999999999998
          kl: 0.016752931209076824
          policy_loss: 0.02415258079353306
          total_loss: 0.007075648133953412
          vf_explained_var: -0.43910983204841614
          vf_loss: 0.002094467815348051
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,429,4706.79,429000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-09-20_09-19-00
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 432
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.020892336280666e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1103263020515444
          entropy_coeff: 0.009999999999999998
          kl: 0.03103956128686652
          policy_loss: 0.0019431047141551972
          total_loss: -0.01776521445976363
          vf_explained_var: -0.44215795397758484
          vf_loss: 0.0013949446030892432
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,430,4716.84,430000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-09-20_09-19-09
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 433
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.531338504421002e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3913918018341063
          entropy_coeff: 0.009999999999999998
          kl: 0.015975208837678816
          policy_loss: -0.005431823949846957
          total_loss: -0.028695551223225063
          vf_explained_var: -0.7926995754241943
          vf_loss: 0.0006501909013751275
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,431,4725.95,431000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-09-20_09-19-19
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 434
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.531338504421002e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4539592213100856
          entropy_coeff: 0.009999999999999998
          kl: 0.01366429765036158
          policy_loss: -0.052888586454921296
          total_loss: -0.07701334555943808
          vf_explained_var: -0.7365373373031616
          vf_loss: 0.0004148297796240917
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,432,4735.09,432000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-09-20_09-19-28
  done: false
  episode_len_mean: 994.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 435
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.531338504421002e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.5078128655751546
          entropy_coeff: 0.009999999999999998
          kl: 0.012648511831503366
          policy_loss: 0.0030092047941353587
          total_loss: -0.0213298582782348
          vf_explained_var: -0.9660525918006897
          vf_loss: 0.0007390664229799565
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,433,4744.75,433000,0,0,0,994.48


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-09-20_09-19-38
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 436
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.531338504421002e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.41903334458669
          entropy_coeff: 0.009999999999999998
          kl: 0.02248162878612929
          policy_loss: -0.042180736838943425
          total_loss: -0.06605845321383741
          vf_explained_var: -0.5307083129882812
          vf_loss: 0.0003126164611684443
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,434,4754.07,434000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-09-20_09-19-47
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 437
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.286051016383701
          entropy_coeff: 0.009999999999999998
          kl: 0.01939365002079206
          policy_loss: -0.012248830041951604
          total_loss: -0.034814508135120076
          vf_explained_var: -0.5995168685913086
          vf_loss: 0.0002948288839257253
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,435,4763.2,435000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-09-20_09-19-56
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 438
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.135689093006982
          entropy_coeff: 0.009999999999999998
          kl: 0.010407115137578889
          policy_loss: 0.008046145861347517
          total_loss: -0.012887900819381078
          vf_explained_var: -0.2901643216609955
          vf_loss: 0.00042284196438736724
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,436,4772.83,436000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-09-20_09-20-06
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 439
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.260105383396149
          entropy_coeff: 0.009999999999999998
          kl: 0.01041690857483941
          policy_loss: -0.02864573618604077
          total_loss: -0.05089643576906787
          vf_explained_var: -0.9666826725006104
          vf_loss: 0.0003503555294325148
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,437,4782.28,437000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-09-20_09-20-16
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 440
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9847181306944952
          entropy_coeff: 0.009999999999999998
          kl: 0.015732842634762968
          policy_loss: -0.01738196810086568
          total_loss: -0.036423375374741024
          vf_explained_var: -0.4350675940513611
          vf_loss: 0.0008057752918426154
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,438,4792.24,438000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-09-20_09-20-26
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 441
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.568567763434516
          entropy_coeff: 0.009999999999999998
          kl: 0.010842136160838529
          policy_loss: -0.0722047969698906
          total_loss: -0.08682446430126826
          vf_explained_var: -1.0
          vf_loss: 0.0010660115273721101
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,439,4802.5,439000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-09-20_09-20-36
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 442
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5455938180287678
          entropy_coeff: 0.009999999999999998
          kl: 0.008495242507657885
          policy_loss: -0.06916968810061613
          total_loss: -0.083742249591483
          vf_explained_var: -0.8965606093406677
          vf_loss: 0.0008833760848372346
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,440,4812.77,440000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-09-20_09-20-46
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 443
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0090637697113887
          entropy_coeff: 0.009999999999999998
          kl: 0.01435259235531241
          policy_loss: -0.04711400953431924
          total_loss: -0.06647835713293818
          vf_explained_var: -0.5098564624786377
          vf_loss: 0.0007262896369664101
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,441,4822.71,441000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-09-20_09-20-57
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 444
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7003364112642076
          entropy_coeff: 0.009999999999999998
          kl: 0.009381924906991893
          policy_loss: 0.006033558481269413
          total_loss: -0.010090028618772824
          vf_explained_var: -0.9948444962501526
          vf_loss: 0.0008797740117491534
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,442,4832.98,442000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-09-20_09-21-07
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 445
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7645020100805495
          entropy_coeff: 0.009999999999999998
          kl: 0.013975933429776216
          policy_loss: 0.02170085629655255
          total_loss: 0.004935256267587344
          vf_explained_var: -1.0
          vf_loss: 0.0008794171651566608
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,443,4843.24,443000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-09-20_09-21-17
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 446
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8919011765056186
          entropy_coeff: 0.009999999999999998
          kl: 0.009144859059296372
          policy_loss: -0.0335427795847257
          total_loss: -0.05168735128309992
          vf_explained_var: -0.8090489506721497
          vf_loss: 0.0007744388580451616
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,444,4853.2,444000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-09-20_09-21-27
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 447
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9693574521276687
          entropy_coeff: 0.009999999999999998
          kl: 0.013708490805028836
          policy_loss: 0.010003396537568834
          total_loss: -0.008823192119598389
          vf_explained_var: -0.49612298607826233
          vf_loss: 0.0008669860783912655
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,445,4863.12,445000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-09-20_09-21-37
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 448
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7114546007580227
          entropy_coeff: 0.009999999999999998
          kl: 0.011298628747698361
          policy_loss: -0.06625102447966734
          total_loss: -0.08206606879830361
          vf_explained_var: -0.4813457131385803
          vf_loss: 0.0012995030232963876
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,446,4873,446000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-09-20_09-21-47
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 449
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7693897432751127
          entropy_coeff: 0.009999999999999998
          kl: 0.01543101340630852
          policy_loss: -0.005809616545836131
          total_loss: -0.02266032596429189
          vf_explained_var: -0.5947757959365845
          vf_loss: 0.0008431879476928669
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,447,4883.16,447000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-09-20_09-21-57
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 450
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9774523496627807
          entropy_coeff: 0.009999999999999998
          kl: 0.01216032046299781
          policy_loss: -0.07607257589697838
          total_loss: -0.09524503176410994
          vf_explained_var: -0.3953596353530884
          vf_loss: 0.0006020670423620484
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,448,4893.17,448000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-09-20_09-22-07
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 451
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.087748302353753
          entropy_coeff: 0.009999999999999998
          kl: 0.012773258139804117
          policy_loss: 0.044452520459890364
          total_loss: 0.02377018787794643
          vf_explained_var: -0.624605655670166
          vf_loss: 0.00019514752744321918
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,449,4903.12,449000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-09-20_09-22-16
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 452
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.4170363585154218
          entropy_coeff: 0.009999999999999998
          kl: 0.01120937941222052
          policy_loss: 0.01051494831012355
          total_loss: -0.013267899428804715
          vf_explained_var: -0.7716747522354126
          vf_loss: 0.00038751446867940506
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,450,4912.56,450000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-09-20_09-22-26
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 453
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.388682656817966
          entropy_coeff: 0.009999999999999998
          kl: 0.01195347111753667
          policy_loss: -0.049484156693021454
          total_loss: -0.07305189081364208
          vf_explained_var: -0.6841434836387634
          vf_loss: 0.0003190926534039641
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,451,4921.91,451000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-09-20_09-22-35
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 454
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3048805210325454
          entropy_coeff: 0.009999999999999998
          kl: 0.015102858598513237
          policy_loss: 0.0006448766630556849
          total_loss: -0.021824907697737216
          vf_explained_var: -0.546601414680481
          vf_loss: 0.0005790211499364281
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,452,4931.44,452000,0,0,0,995.91


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-09-20_09-22-45
  done: false
  episode_len_mean: 995.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 455
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.3656553294923572
          entropy_coeff: 0.009999999999999998
          kl: 0.012919769279380777
          policy_loss: -0.10339580968850189
          total_loss: -0.12659327576143875
          vf_explained_var: -0.36324769258499146
          vf_loss: 0.0004590850171451974
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,453,4941.3,453000,0,0,0,995.91




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-09-20_09-23-12
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 456
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.259736919403076
          entropy_coeff: 0.009999999999999998
          kl: 0.012451699654216379
          policy_loss: -0.031916112090564434
          total_loss: -0.05416588160312838
          vf_explained_var: -0.48556509613990784
          vf_loss: 0.0003475989650824987
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_since_restore: 454
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,454,4968.04,454000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-09-20_09-23-22
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 457
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.2058662705951266
          entropy_coeff: 0.009999999999999998
          kl: 0.009511522100241561
          policy_loss: -0.03613249427742428
          total_loss: -0.05774686526921061
          vf_explained_var: -1.0
          vf_loss: 0.0004442907897909107
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_since_restore: 455
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,455,4977.49,455000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-09-20_09-23-31
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 458
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.242859817875756
          entropy_coeff: 0.009999999999999998
          kl: 0.008730028994885923
          policy_loss: -0.010526755193455351
          total_loss: -0.032601955926252736
          vf_explained_var: -0.3887447416782379
          vf_loss: 0.000353395647562138
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_since_restore: 456
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,456,4986.59,456000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-09-20_09-23-40
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 459
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.215830541981591
          entropy_coeff: 0.009999999999999998
          kl: 0.013998886139311455
          policy_loss: -0.037177417344517176
          total_loss: -0.05893782203396161
          vf_explained_var: -1.0
          vf_loss: 0.00039789953548784575
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_since_restore: 457
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,457,4996.06,457000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-09-20_09-23-50
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 460
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.0463319460550946
          entropy_coeff: 0.009999999999999998
          kl: 0.011247222517476955
          policy_loss: -0.01344573615739743
          total_loss: -0.03336643133726385
          vf_explained_var: -0.49064725637435913
          vf_loss: 0.0005426224044640549
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_since_restore: 458
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,458,5005.94,458000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-09-20_09-24-00
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 461
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9565904921955533
          entropy_coeff: 0.009999999999999998
          kl: 0.011919071832220693
          policy_loss: -0.003655697074201372
          total_loss: -0.022593611851334572
          vf_explained_var: -1.0
          vf_loss: 0.0006279861800269121
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iterations_since_restore: 459
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,459,5015.74,459000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-09-20_09-24-10
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 462
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6827359331978693
          entropy_coeff: 0.009999999999999998
          kl: 0.01552720478268582
          policy_loss: 0.026513351582818562
          total_loss: 0.010561253668533431
          vf_explained_var: -1.0
          vf_loss: 0.0008752631217551728
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterations_since_restore: 460
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,460,5025.65,460000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-09-20_09-24-20
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 463
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8010846787028842
          entropy_coeff: 0.009999999999999998
          kl: 0.011761632729729498
          policy_loss: 0.004497931152582168
          total_loss: -0.012713662617736392
          vf_explained_var: -0.6172215938568115
          vf_loss: 0.0007992538972757757
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_since_restore: 461
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,461,5035.61,461000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-09-20_09-24-30
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 464
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.5617087496651543
          entropy_coeff: 0.009999999999999998
          kl: 0.008638206374475023
          policy_loss: 0.024909722846415308
          total_loss: 0.010237038259704907
          vf_explained_var: -1.0
          vf_loss: 0.0009444002120289951
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterations_since_restore: 462
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,462,5045.6,462000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-09-20_09-24-40
  done: false
  episode_len_mean: 994.53
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 465
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6516528407732645
          entropy_coeff: 0.009999999999999998
          kl: 0.00920619789485465
          policy_loss: -0.07727794936961598
          total_loss: -0.09296866191758049
          vf_explained_var: -1.0
          vf_loss: 0.0008258154842122976
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iterations_since_restore: 463
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,463,5055.65,463000,0,0,0,994.53


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-09-20_09-24-50
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 466
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.9257187472449409
          entropy_coeff: 0.009999999999999998
          kl: 0.01258320549393495
          policy_loss: -0.05585459290693204
          total_loss: -0.0745204960513446
          vf_explained_var: -0.9717624187469482
          vf_loss: 0.0005912831121046717
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_since_restore: 464
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,464,5065.64,464000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-09-20_09-25-00
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 467
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.892923194832272
          entropy_coeff: 0.009999999999999998
          kl: 0.015336668037296913
          policy_loss: -0.045721357357170846
          total_loss: -0.06419985323316521
          vf_explained_var: -0.9979424476623535
          vf_loss: 0.000450734882744857
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_since_restore: 465
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,465,5075.75,465000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-09-20_09-25-10
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 468
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.934077591366238
          entropy_coeff: 0.009999999999999998
          kl: 0.017083000432900283
          policy_loss: -0.043163849827316074
          total_loss: -0.06168154233859645
          vf_explained_var: -0.9979824423789978
          vf_loss: 0.0008230834670636492
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_since_restore: 466
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,466,5085.81,466000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-09-20_09-25-20
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 469
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.8162436657481724
          entropy_coeff: 0.009999999999999998
          kl: 0.013868368925198125
          policy_loss: 0.005949488346878853
          total_loss: -0.011680571646947
          vf_explained_var: -1.0
          vf_loss: 0.0005323782931858053
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  iterations_since_restore: 467
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,467,5095.82,467000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-09-20_09-25-30
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 470
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.077725999885135
          entropy_coeff: 0.009999999999999998
          kl: 0.014453238954064633
          policy_loss: 0.0040002857645352686
          total_loss: -0.016187688211599986
          vf_explained_var: -0.35351210832595825
          vf_loss: 0.0005892849504663092
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  iterations_since_restore: 468
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,468,5105.98,468000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-09-20_09-25-40
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 471
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.933920509285397
          entropy_coeff: 0.009999999999999998
          kl: 0.011371345654184259
          policy_loss: -0.029654191020462246
          total_loss: -0.048557238239381045
          vf_explained_var: -0.9963827133178711
          vf_loss: 0.00043615883041638883
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_since_restore: 469
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,469,5116.03,469000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-09-20_09-25-51
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 472
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.944882082939148
          entropy_coeff: 0.009999999999999998
          kl: 0.015703349876049995
          policy_loss: 0.04583418005042606
          total_loss: 0.026818938387764824
          vf_explained_var: -0.5446791052818298
          vf_loss: 0.0004335815573641513
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations_since_restore: 470
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,470,5126.21,470000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-09-20_09-26-01
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 473
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.905033704969618
          entropy_coeff: 0.009999999999999998
          kl: 0.00890186540449061
          policy_loss: -0.004585054020086924
          total_loss: -0.02316024870508247
          vf_explained_var: -0.6371240615844727
          vf_loss: 0.0004751376028353762
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iterations_since_restore: 471
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,471,5136.28,471000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-09-20_09-26-11
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 474
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.802848419878218
          entropy_coeff: 0.009999999999999998
          kl: 0.013480005541406994
          policy_loss: -0.027982462611463334
          total_loss: -0.04541212846007612
          vf_explained_var: -0.9169938564300537
          vf_loss: 0.0005988165933457721
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iterations_since_restore: 472
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,472,5146.33,472000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-09-20_09-26-21
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 475
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7251477784580656
          entropy_coeff: 0.009999999999999998
          kl: 0.01106804888727295
          policy_loss: 0.002729059424665239
          total_loss: -0.014118910332520803
          vf_explained_var: -1.0
          vf_loss: 0.0004035097778897681
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iterations_since_restore: 473
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,473,5156.37,473000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-09-20_09-26-31
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 476
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7805971609221565
          entropy_coeff: 0.009999999999999998
          kl: 0.021569777871498868
          policy_loss: -0.0627669180639916
          total_loss: -0.08025646495322386
          vf_explained_var: -0.6695361733436584
          vf_loss: 0.0003164243790201403
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_since_restore: 474
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,474,5166.38,474000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-09-20_09-26-41
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 477
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0195511634947252e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7631557292408413
          entropy_coeff: 0.009999999999999998
          kl: 0.013932146677234374
          policy_loss: -0.03940409264630741
          total_loss: -0.05669055986735556
          vf_explained_var: -1.0
          vf_loss: 0.0003450917254667729
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations_since_restore: 475
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,475,5176.42,475000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-09-20_09-26-51
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 478
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0195511634947252e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6493221256468031
          entropy_coeff: 0.009999999999999998
          kl: 0.023531216210221452
          policy_loss: -0.04396275199121899
          total_loss: -0.060012577805254194
          vf_explained_var: -0.4020581841468811
          vf_loss: 0.00044339323552170147
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_since_restore: 476
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,476,5186.46,476000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-09-20_09-27-01
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 479
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5361509746975368
          entropy_coeff: 0.009999999999999998
          kl: 0.01306769425534237
          policy_loss: -0.007808530703186989
          total_loss: -0.022628798625535435
          vf_explained_var: -0.6442742943763733
          vf_loss: 0.0005412418603858289
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iterations_since_restore: 477
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,477,5196.51,477000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-09-20_09-27-11
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 480
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5289128224054973
          entropy_coeff: 0.009999999999999998
          kl: 0.010428096054684961
          policy_loss: -0.042887321818206044
          total_loss: -0.0576901132447852
          vf_explained_var: -1.0
          vf_loss: 0.000486337648342467
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations_since_restore: 478
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,478,5206.38,478000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-09-20_09-27-21
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 481
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5019888308313158
          entropy_coeff: 0.009999999999999998
          kl: 0.0097515820177757
          policy_loss: -0.07938576348953777
          total_loss: -0.09401956146789922
          vf_explained_var: -0.6705144643783569
          vf_loss: 0.00038608935315601734
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_since_restore: 479
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,479,5216.35,479000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-09-20_09-27-31
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 482
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7106860505210029
          entropy_coeff: 0.009999999999999998
          kl: 0.017212884254770543
          policy_loss: -0.029388487918509378
          total_loss: -0.046075493469834326
          vf_explained_var: -0.7144879698753357
          vf_loss: 0.0004198533217681365
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_since_restore: 480
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,480,5226.37,480000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-09-20_09-27-41
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 483
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5050791263580323
          entropy_coeff: 0.009999999999999998
          kl: 0.01321849673413785
          policy_loss: -0.07455893572833804
          total_loss: -0.08914613674084346
          vf_explained_var: -0.9881923198699951
          vf_loss: 0.0004635915393009782
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iterations_since_restore: 481
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,481,5236.27,481000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-09-20_09-27-51
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 484
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.647081572479672
          entropy_coeff: 0.009999999999999998
          kl: 0.013362733620186872
          policy_loss: -0.008098665955993864
          total_loss: -0.024178038040796917
          vf_explained_var: -0.6823546886444092
          vf_loss: 0.0003914407920092344
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_since_restore: 482
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,482,5246.36,482000,0,0,0,995.87


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-09-20_09-28-01
  done: false
  episode_len_mean: 995.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 485
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6946034722858005
          entropy_coeff: 0.009999999999999998
          kl: 0.009633218054999778
          policy_loss: 0.054920496584640606
          total_loss: 0.03847826792755061
          vf_explained_var: -0.6909838914871216
          vf_loss: 0.0005038056771607242
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterations_since_restore: 483
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,483,5256.36,483000,0,0,0,995.87




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-09-20_09-28-29
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 486
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6549524439705743
          entropy_coeff: 0.009999999999999998
          kl: 0.014259250210196504
          policy_loss: -0.04051943802171283
          total_loss: -0.05659324843436479
          vf_explained_var: -0.5792539119720459
          vf_loss: 0.00047571294675839856
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_since_restore: 484
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,484,5283.95,484000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-09-20_09-28-39
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 487
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7315335591634116
          entropy_coeff: 0.009999999999999998
          kl: 0.007213116864365788
          policy_loss: -0.07842130503720707
          total_loss: -0.09531787729097738
          vf_explained_var: -0.7817779779434204
          vf_loss: 0.00041876235821594794
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_since_restore: 485
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,485,5293.92,485000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-09-20_09-28-49
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 488
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6044758492045932
          entropy_coeff: 0.009999999999999998
          kl: 0.008668197586590163
          policy_loss: -0.001070071632663409
          total_loss: -0.016530112591054705
          vf_explained_var: -0.7608753442764282
          vf_loss: 0.0005847178852289087
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_since_restore: 486
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,486,5303.78,486000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-09-20_09-28-58
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 489
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4547195348474715
          entropy_coeff: 0.009999999999999998
          kl: 0.012708920445717033
          policy_loss: 0.012241434802611669
          total_loss: -0.001934227099021276
          vf_explained_var: -0.8497382402420044
          vf_loss: 0.0003715325814684749
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_since_restore: 487
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,487,5313.66,487000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-09-20_09-29-08
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 490
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.653827542728848
          entropy_coeff: 0.009999999999999998
          kl: 0.01727756781854792
          policy_loss: -0.09177121197183927
          total_loss: -0.10800680820312765
          vf_explained_var: -0.6452397108078003
          vf_loss: 0.0003026800471060495
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_since_restore: 488
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,488,5323.32,488000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-09-20_09-29-18
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 491
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4617874383926392
          entropy_coeff: 0.009999999999999998
          kl: 0.013130867264516234
          policy_loss: -0.012443531966871686
          total_loss: -0.026569155603647233
          vf_explained_var: -0.45342686772346497
          vf_loss: 0.0004922493752221473
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since_restore: 489
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,489,5333.03,489000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-09-20_09-29-28
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 492
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.568265402317047
          entropy_coeff: 0.009999999999999998
          kl: 0.01652992091110228
          policy_loss: -0.056309168537457786
          total_loss: -0.07145534687572055
          vf_explained_var: -0.31749090552330017
          vf_loss: 0.0005364733570281209
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_restore: 490
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,490,5342.9,490000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-09-20_09-29-38
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 493
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5711463292439778
          entropy_coeff: 0.009999999999999998
          kl: 0.012408593810486302
          policy_loss: -0.006427999917003844
          total_loss: -0.02155305668711662
          vf_explained_var: -0.44675835967063904
          vf_loss: 0.0005864087883512386
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since_restore: 491
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,491,5352.8,491000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-09-20_09-29-48
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 494
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6869897656970554
          entropy_coeff: 0.009999999999999998
          kl: 0.012479776045445905
          policy_loss: -0.0074359136323134106
          total_loss: -0.023907452821731567
          vf_explained_var: -1.0
          vf_loss: 0.0003983574414936205
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_since_restore: 492
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,492,5362.64,492000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-09-20_09-29-57
  done: false
  episode_len_mean: 994.36
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 495
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5921069741249085
          entropy_coeff: 0.009999999999999998
          kl: 0.017197808363138577
          policy_loss: 0.012053318487273323
          total_loss: -0.0030637984888421165
          vf_explained_var: -0.5344363451004028
          vf_loss: 0.0008039527061757528
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_restore: 493
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,493,5372.44,493000,0,0,0,994.36


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-09-20_09-30-07
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 496
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4744999647140502
          entropy_coeff: 0.009999999999999998
          kl: 0.009258614723842287
          policy_loss: -0.0001405870955851343
          total_loss: -0.014300698497229153
          vf_explained_var: -0.5082854628562927
          vf_loss: 0.0005848894168189468
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_since_restore: 494
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,494,5382.51,494000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-09-20_09-30-17
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 497
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6347027394506666
          entropy_coeff: 0.009999999999999998
          kl: 0.01984400967316707
          policy_loss: 0.001965460139844153
          total_loss: -0.013362510171201493
          vf_explained_var: -0.7603388428688049
          vf_loss: 0.001019056511318518
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since_restore: 495
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,495,5392.38,495000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-09-20_09-30-27
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 498
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5816919194327461
          entropy_coeff: 0.009999999999999998
          kl: 0.012695351528617928
          policy_loss: -0.015245817746553156
          total_loss: -0.03043913058936596
          vf_explained_var: -0.6369250416755676
          vf_loss: 0.0006236043706950214
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_since_restore: 496
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,496,5402.19,496000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-09-20_09-30-37
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 499
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5427910725275675
          entropy_coeff: 0.009999999999999998
          kl: 0.017858496281681367
          policy_loss: 0.037488394934270114
          total_loss: 0.02251978673868709
          vf_explained_var: -0.6215006709098816
          vf_loss: 0.0004593013764760043
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_since_restore: 497
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,497,5412.08,497000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-09-20_09-30-47
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 500
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.732596680853102
          entropy_coeff: 0.009999999999999998
          kl: 0.009776743288154914
          policy_loss: -0.026673438772559167
          total_loss: -0.04312060620221827
          vf_explained_var: -0.9460955858230591
          vf_loss: 0.0008788001257926225
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_since_restore: 498
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,498,5422.09,498000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-09-20_09-30-57
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 501
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5927435649765862
          entropy_coeff: 0.009999999999999998
          kl: 0.015536762221639893
          policy_loss: -0.04435652411646313
          total_loss: -0.059596933217512235
          vf_explained_var: -1.0
          vf_loss: 0.0006870256051317685
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_since_restore: 499
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,499,5431.79,499000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-09-20_09-31-07
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 502
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.9371164507336087
          entropy_coeff: 0.009999999999999998
          kl: 0.015447299311593376
          policy_loss: -0.03995449576112959
          total_loss: -0.05866869986057281
          vf_explained_var: -0.5348568558692932
          vf_loss: 0.0006569604205045229
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations_since_restore: 500
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,500,5442.06,500000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-09-20_09-31-18
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 503
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6818943791919285
          entropy_coeff: 0.009999999999999998
          kl: 0.014771592902090132
          policy_loss: -0.020652170934610897
          total_loss: -0.03716460205614567
          vf_explained_var: -0.1733488142490387
          vf_loss: 0.00030651225485295677
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_since_restore: 501
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,501,5452.42,501000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-09-20_09-31-28
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 504
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4767320262061225
          entropy_coeff: 0.009999999999999998
          kl: 0.011275887010103286
          policy_loss: -0.006295013758871291
          total_loss: -0.020615073790152868
          vf_explained_var: -0.9084663987159729
          vf_loss: 0.00044725895309562073
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_since_restore: 502
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,502,5462.72,502000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-09-20_09-31-38
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 505
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8629927105373807
          entropy_coeff: 0.009999999999999998
          kl: 0.007479332141671414
          policy_loss: -0.020399030215210386
          total_loss: -0.03872802679737409
          vf_explained_var: -0.8670327663421631
          vf_loss: 0.00030093173425282455
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_since_restore: 503
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,503,5473.22,503000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-09-20_09-31-49
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 506
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7064217858844333
          entropy_coeff: 0.009999999999999998
          kl: 0.019406288906903216
          policy_loss: -0.027086992147896026
          total_loss: -0.04384486228227615
          vf_explained_var: -0.6394553184509277
          vf_loss: 0.00030634692035770664
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_since_restore: 504
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,504,5483.44,504000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-09-20_09-31-59
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 507
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7636322114202712
          entropy_coeff: 0.009999999999999998
          kl: 0.018415168416045843
          policy_loss: -0.12986082955160075
          total_loss: -0.14711832075069348
          vf_explained_var: -0.9961034059524536
          vf_loss: 0.00037882995481292405
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_restore: 505
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,505,5493.93,505000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-09-20_09-32-10
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 508
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.0159029603004455
          entropy_coeff: 0.009999999999999998
          kl: 0.013777586213073947
          policy_loss: 0.004927580720848508
          total_loss: -0.014672123061286079
          vf_explained_var: -0.502028226852417
          vf_loss: 0.0005593216835728122
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since_restore: 506
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,506,5504.41,506000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-09-20_09-32-20
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 509
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.008553495672014
          entropy_coeff: 0.009999999999999998
          kl: 0.016694273627690315
          policy_loss: 0.03298587509327465
          total_loss: 0.013351456655396356
          vf_explained_var: -0.6966861486434937
          vf_loss: 0.0004511141646718089
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_since_restore: 507
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,507,5514.99,507000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-09-20_09-32-31
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 510
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6817149268256293
          entropy_coeff: 0.009999999999999998
          kl: 0.01681070107591817
          policy_loss: 0.01832950164874395
          total_loss: 0.0020534948342376285
          vf_explained_var: -0.13512089848518372
          vf_loss: 0.0005411417419155542
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterations_since_restore: 508
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,508,5525.49,508000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-09-20_09-32-41
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 511
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5293267452420883e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4063277436627282
          entropy_coeff: 0.009999999999999998
          kl: 0.023485064118752813
          policy_loss: -0.14196485098865297
          total_loss: -0.15567471235990524
          vf_explained_var: 0.12288360297679901
          vf_loss: 0.00035341617719192677
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,509,5535.81,509000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-09-20_09-32-51
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 512
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.649394200907813
          entropy_coeff: 0.009999999999999998
          kl: 0.008138222984469617
          policy_loss: -0.0492942977282736
          total_loss: -0.06537696876459652
          vf_explained_var: -0.7613017559051514
          vf_loss: 0.00041127143308080527
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,510,5545.99,510000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-09-20_09-33-02
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 513
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7181570172309875
          entropy_coeff: 0.009999999999999998
          kl: 0.014462423582675557
          policy_loss: -0.05935398894879553
          total_loss: -0.07616094847520193
          vf_explained_var: -0.3955064117908478
          vf_loss: 0.00037460701633891503
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,511,5556.29,511000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-09-20_09-33-12
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 514
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.820177310042911
          entropy_coeff: 0.009999999999999998
          kl: 0.012151052681242823
          policy_loss: 0.00750099155637953
          total_loss: -0.010158007840315501
          vf_explained_var: -0.09459666907787323
          vf_loss: 0.0005427770652911729
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations_since_restore: 512
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,512,5566.8,512000,0,0,0,995.73


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-09-20_09-33-23
  done: false
  episode_len_mean: 995.73
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 515
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8307423803541396
          entropy_coeff: 0.009999999999999998
          kl: 0.019276558432237245
          policy_loss: -0.0006210726996262868
          total_loss: -0.01829266490207778
          vf_explained_var: 0.08862311393022537
          vf_loss: 0.0006358334235200244
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,513,5577.23,513000,0,0,0,995.73




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-09-20_09-33-51
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 516
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2744330141279432
          entropy_coeff: 0.009999999999999998
          kl: 0.008154313372230743
          policy_loss: 0.01233999952673912
          total_loss: -0.00017782627708382077
          vf_explained_var: -0.5348114371299744
          vf_loss: 0.00022650501972141986
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  iterations_since_restore: 514
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,514,5605.75,514000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-09-20_09-34-02
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 517
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.516224686967002
          entropy_coeff: 0.009999999999999998
          kl: 0.012456725673577808
          policy_loss: -0.09554641818006833
          total_loss: -0.11032354864809248
          vf_explained_var: -0.25111913681030273
          vf_loss: 0.0003851165524489867
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 515
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,515,5616.18,515000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-09-20_09-34-12
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 518
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4033665855725606
          entropy_coeff: 0.009999999999999998
          kl: 0.01845003799822962
          policy_loss: -0.03795247086220317
          total_loss: -0.05161098179717859
          vf_explained_var: 0.44704386591911316
          vf_loss: 0.00037515405662513025
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 516
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,516,5626.2,516000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-09-20_09-34-22
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 519
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.824060282442305
          entropy_coeff: 0.009999999999999998
          kl: 0.011834628690359948
          policy_loss: -0.08641264956030581
          total_loss: -0.1041807693325811
          vf_explained_var: -0.21760979294776917
          vf_loss: 0.00047248398607027616
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 517
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,517,5636.64,517000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-09-20_09-34-32
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 520
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.293990117863131e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3166317303975423
          entropy_coeff: 0.009999999999999998
          kl: 0.0483830928062081
          policy_loss: 0.08979354856742752
          total_loss: 0.07708512660529879
          vf_explained_var: -0.11406131088733673
          vf_loss: 0.0004578979349591666
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 518
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,518,5646.55,518000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-09-20_09-34-43
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 521
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7112243122524686
          entropy_coeff: 0.009999999999999998
          kl: 0.017090850780693105
          policy_loss: -0.06471010289258428
          total_loss: -0.08141226205560896
          vf_explained_var: -0.5841163396835327
          vf_loss: 0.0004100830643437803
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 519
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,519,5657.15,519000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-09-20_09-34-53
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 522
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.721665174431271
          entropy_coeff: 0.009999999999999998
          kl: 0.01460496366815794
          policy_loss: 0.02308639900551902
          total_loss: 0.0062247569362322485
          vf_explained_var: -0.2449001669883728
          vf_loss: 0.0003550080123305735
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_since_restore: 520
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,520,5667.33,520000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-09-20_09-35-04
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 523
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.789780514770084
          entropy_coeff: 0.009999999999999998
          kl: 0.013745413222324704
          policy_loss: -0.046956950177749
          total_loss: -0.06454777403010262
          vf_explained_var: -0.7374032139778137
          vf_loss: 0.0003069770566879823
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_since_restore: 521
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,521,5678.1,521000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-09-20_09-35-14
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 524
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7007594916555617
          entropy_coeff: 0.009999999999999998
          kl: 0.013470711330082649
          policy_loss: -0.0005066297948360443
          total_loss: -0.017254981936679945
          vf_explained_var: -0.8338697552680969
          vf_loss: 0.00025924251595925955
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iterations_since_restore: 522
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,522,5688.7,522000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-09-20_09-35-25
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 525
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.755155372619629
          entropy_coeff: 0.009999999999999998
          kl: 0.014916294761707347
          policy_loss: 0.08674262596501245
          total_loss: 0.06950102121465736
          vf_explained_var: -0.44371071457862854
          vf_loss: 0.00030994884453118885
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_since_restore: 523
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,523,5699.48,523000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-09-20_09-35-36
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 526
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7369939062330457
          entropy_coeff: 0.009999999999999998
          kl: 0.010922096971461947
          policy_loss: 0.055234671549664605
          total_loss: 0.0380757932861646
          vf_explained_var: -0.04997142031788826
          vf_loss: 0.0002110650861545259
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterations_since_restore: 524
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,524,5710.28,524000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-09-20_09-35-46
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 527
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.455030651887258
          entropy_coeff: 0.009999999999999998
          kl: 0.012113539279359426
          policy_loss: -0.01952913486295276
          total_loss: -0.0336103359858195
          vf_explained_var: -0.31534910202026367
          vf_loss: 0.0004691060567792091
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iterations_since_restore: 525
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,525,5720.71,525000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-09-20_09-35-58
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 528
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4409851767946974e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8174761560228136
          entropy_coeff: 0.009999999999999998
          kl: 0.026236924782481946
          policy_loss: -0.025860002181596228
          total_loss: -0.043571420055296683
          vf_explained_var: -0.7690131068229675
          vf_loss: 0.0004633452555733836
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iterations_since_restore: 526
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,526,5732.61,526000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-09-20_09-36-10
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 529
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1614777651920484e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8033158858617147
          entropy_coeff: 0.009999999999999998
          kl: 0.012086879863492012
          policy_loss: 0.02713029028640853
          total_loss: 0.009732127851910062
          vf_explained_var: -0.37302398681640625
          vf_loss: 0.00063499551518665
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iterations_since_restore: 527
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,527,5744.13,527000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-09-20_09-36-21
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 530
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1614777651920484e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8396692633628846
          entropy_coeff: 0.009999999999999998
          kl: 0.012756249347907407
          policy_loss: -0.06930708272589578
          total_loss: -0.08745374066962136
          vf_explained_var: 0.4763726592063904
          vf_loss: 0.0002500351123873972
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iterations_since_restore: 528
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,528,5755.55,528000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-09-20_09-36-32
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 531
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1614777651920484e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5735668725437588
          entropy_coeff: 0.009999999999999998
          kl: 0.03547799045243513
          policy_loss: -0.02324621267616749
          total_loss: -0.03844743462072479
          vf_explained_var: 0.16187423467636108
          vf_loss: 0.000534445383042718
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iterations_since_restore: 529
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,529,5766.36,529000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-09-20_09-36-43
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 532
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.742216647788067e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6719449175728691
          entropy_coeff: 0.009999999999999998
          kl: 0.010481240984655532
          policy_loss: -0.029657909232709143
          total_loss: -0.0460732070936097
          vf_explained_var: -0.508866548538208
          vf_loss: 0.0003041514449351881
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iterations_since_restore: 530
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,530,5777.18,530000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-09-20_09-36-54
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 533
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.742216647788067e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.733893174595303
          entropy_coeff: 0.009999999999999998
          kl: 0.013291893653796846
          policy_loss: 0.034200799299610986
          total_loss: 0.017252844365106688
          vf_explained_var: -1.0
          vf_loss: 0.0003909764591500991
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iterations_since_restore: 531
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,531,5788.14,531000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-09-20_09-37-05
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 534
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.742216647788067e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7584427224265204
          entropy_coeff: 0.009999999999999998
          kl: 0.022079993267602464
          policy_loss: -0.034447061187691155
          total_loss: -0.05149232819676399
          vf_explained_var: -0.7939902544021606
          vf_loss: 0.000539159754892656
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_since_restore: 532
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,532,5799.31,532000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-09-20_09-37-16
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 535
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1613324971682104e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.889826946788364
          entropy_coeff: 0.009999999999999998
          kl: 0.021431169470420584
          policy_loss: 0.01697747463153468
          total_loss: -0.0014415838238265779
          vf_explained_var: -0.7793727517127991
          vf_loss: 0.00047920737189189016
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_since_restore: 533
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,533,5810.34,533000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-09-20_09-37-27
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 536
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7419987457523165e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8005528145366245
          entropy_coeff: 0.009999999999999998
          kl: 0.0208422932062081
          policy_loss: -0.08376469781829252
          total_loss: -0.10146849134729968
          vf_explained_var: -0.4976228177547455
          vf_loss: 0.0003017353083123453
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,534,5821.38,534000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-09-20_09-37-38
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 537
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7203731642829048
          entropy_coeff: 0.009999999999999998
          kl: 0.01695822750650496
          policy_loss: -0.040853596147563724
          total_loss: -0.057736505278282696
          vf_explained_var: -0.5775948166847229
          vf_loss: 0.00032082261160313566
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,535,5832.5,535000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-09-20_09-37-49
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 538
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9497089465459188
          entropy_coeff: 0.009999999999999998
          kl: 0.015481398047737778
          policy_loss: 0.03353211962514453
          total_loss: 0.014314390139447318
          vf_explained_var: -0.16035322844982147
          vf_loss: 0.00027935696562053636
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_since_restore: 536
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,536,5843.57,536000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-09-20_09-38-00
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 539
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8802519665824042
          entropy_coeff: 0.009999999999999998
          kl: 0.018039226841811463
          policy_loss: 0.01307272066672643
          total_loss: -0.005495048065980276
          vf_explained_var: -0.2869774401187897
          vf_loss: 0.00023475030369202917
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterations_since_restore: 537
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,537,5854.4,537000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-09-20_09-38-11
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 540
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9637380454275344
          entropy_coeff: 0.009999999999999998
          kl: 0.015027174151984695
          policy_loss: 0.0652838862604565
          total_loss: 0.04599358547064993
          vf_explained_var: -0.36159271001815796
          vf_loss: 0.0003470764611847699
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations_since_restore: 538
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,538,5865.5,538000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-09-20_09-38-22
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 541
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8791547457377116
          entropy_coeff: 0.009999999999999998
          kl: 0.01586210640706555
          policy_loss: -0.09110939494437642
          total_loss: -0.10958868894312117
          vf_explained_var: -0.342478483915329
          vf_loss: 0.00031225111953163935
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations_since_restore: 539
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,539,5876.37,539000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-09-20_09-38-33
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 542
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7805566840701632
          entropy_coeff: 0.009999999999999998
          kl: 0.01760120283474293
          policy_loss: 0.02707177698612213
          total_loss: 0.009837522192133798
          vf_explained_var: -0.42798516154289246
          vf_loss: 0.0005713084843591787
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations_since_restore: 540
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,540,5887.46,540000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-09-20_09-38-45
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 543
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7691873033841452
          entropy_coeff: 0.009999999999999998
          kl: 0.01281144103692695
          policy_loss: 0.01709805859459771
          total_loss: -0.00017973101801342433
          vf_explained_var: -0.538998544216156
          vf_loss: 0.00041408026105879496
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations_since_restore: 541
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,541,5898.9,541000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-09-20_09-38-57
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 544
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.62322678565979
          entropy_coeff: 0.009999999999999998
          kl: 0.01984798812388296
          policy_loss: -0.04217687555485301
          total_loss: -0.058087134030130175
          vf_explained_var: -1.0
          vf_loss: 0.0003220078634007627
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_since_restore: 542
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,542,5911.05,542000,0,0,0,995.71




Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-09-20_09-39-26
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 546
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.606622823079427
          entropy_coeff: 0.009999999999999998
          kl: 0.010630187248818304
          policy_loss: -0.08160747107532289
          total_loss: -0.09753550808462831
          vf_explained_var: -0.5860159397125244
          vf_loss: 0.0001381919152562558
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations_since_restore: 543
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,543,5940.03,543000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-09-20_09-39-39
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 547
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8668488383293151
          entropy_coeff: 0.009999999999999998
          kl: 0.011243722870336568
          policy_loss: -0.0002521674045258098
          total_loss: -0.018669794851707087
          vf_explained_var: -0.03315213695168495
          vf_loss: 0.00025086043832642544
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_since_restore: 544
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,544,5952.77,544000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-09-20_09-39-50
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 548
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9139534539646572
          entropy_coeff: 0.009999999999999998
          kl: 0.011841528407173247
          policy_loss: 0.057873949739668105
          total_loss: 0.0388746546374427
          vf_explained_var: -0.5891544222831726
          vf_loss: 0.00014024031996895145
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations_since_restore: 545
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,545,5963.85,545000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-09-20_09-40-01
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 549
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7366684238115946
          entropy_coeff: 0.009999999999999998
          kl: 0.014497370160237387
          policy_loss: 0.04473285612960656
          total_loss: 0.027470564221342406
          vf_explained_var: -0.997613251209259
          vf_loss: 0.0001043928915654154
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_since_restore: 546
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,546,5974.55,546000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-09-20_09-40-11
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 550
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7497562898529901
          entropy_coeff: 0.009999999999999998
          kl: 0.010235401723821664
          policy_loss: 0.1569976195693016
          total_loss: 0.13958757428659332
          vf_explained_var: -0.3981219530105591
          vf_loss: 8.75151018084984e-05
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iterations_since_restore: 547
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,547,5985.32,547000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-09-20_09-40-22
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 551
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8228864047262403
          entropy_coeff: 0.009999999999999998
          kl: 0.014687230547986355
          policy_loss: 0.020581128199895223
          total_loss: 0.0025708562797970244
          vf_explained_var: -0.4214443266391754
          vf_loss: 0.00021859308965051444
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_since_restore: 548
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,548,5996.29,548000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-09-20_09-40-34
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 552
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6129981186284726e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8525299059020148
          entropy_coeff: 0.009999999999999998
          kl: 0.03542004830947469
          policy_loss: -0.042590905560387506
          total_loss: -0.05978998003734483
          vf_explained_var: -0.45524710416793823
          vf_loss: 0.0013262202696447882
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_since_restore: 549
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,549,6007.43,549000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-09-20_09-40-45
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 553
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.91949717794271e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9382588426272074
          entropy_coeff: 0.009999999999999998
          kl: 0.018258706905567912
          policy_loss: 0.054732538759708405
          total_loss: 0.03554486185312271
          vf_explained_var: -0.07418433576822281
          vf_loss: 0.0001949107161231546
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_since_restore: 550
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,550,6018.39,550000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-09-20_09-40-55
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 554
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.91949717794271e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.001694361368815
          entropy_coeff: 0.009999999999999998
          kl: 0.03790795744340277
          policy_loss: -0.08129692069358296
          total_loss: -0.10118562140398556
          vf_explained_var: -0.2663594186306
          vf_loss: 0.00012823245360777947
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations_since_restore: 551
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,551,6029.2,551000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-09-20_09-41-07
  done: false
  episode_len_mean: 994.33
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 555
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.934646807776557
          entropy_coeff: 0.009999999999999998
          kl: 0.012819531105788763
          policy_loss: -0.0005844672313994831
          total_loss: -0.019719114609890512
          vf_explained_var: -0.575294554233551
          vf_loss: 0.0002118187194759634
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations_since_restore: 552
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,552,6040.5,552000,0,0,0,994.33


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-09-20_09-41-18
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 556
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8596319463517932
          entropy_coeff: 0.009999999999999998
          kl: 0.019310699345396483
          policy_loss: -0.0076688617467880246
          total_loss: -0.0257643005086316
          vf_explained_var: -0.6761561632156372
          vf_loss: 0.0005008752237901919
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iterations_since_restore: 553
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,553,6051.67,553000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-09-20_09-41-29
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 557
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8384420567088657
          entropy_coeff: 0.009999999999999998
          kl: 0.01389034146258492
          policy_loss: 0.02553042910165257
          total_loss: 0.0074436268872684905
          vf_explained_var: -0.685002863407135
          vf_loss: 0.00029761640034848825
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iterations_since_restore: 554
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,554,6062.82,554000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-09-20_09-41-41
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 558
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.042949589093526
          entropy_coeff: 0.009999999999999998
          kl: 0.019596665868998404
          policy_loss: 0.11508881251017253
          total_loss: 0.0948163530892796
          vf_explained_var: -0.2996551990509033
          vf_loss: 0.00015702896170195245
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations_since_restore: 555
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,555,6074.16,555000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-09-20_09-41-52
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 559
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9053052902221679
          entropy_coeff: 0.009999999999999998
          kl: 0.012576074923019655
          policy_loss: 0.023574694825543297
          total_loss: 0.004779105219576094
          vf_explained_var: -0.8919441103935242
          vf_loss: 0.00025745641744126464
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iterations_since_restore: 556
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,556,6085.27,556000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-09-20_09-42-03
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 560
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8288131872812907
          entropy_coeff: 0.009999999999999998
          kl: 0.01708975763897073
          policy_loss: 0.00212913379073143
          total_loss: -0.015711813254488838
          vf_explained_var: -0.873997151851654
          vf_loss: 0.0004471809129528184
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  iterations_since_restore: 557
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,557,6096.74,557000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-09-20_09-42-14
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 561
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8245607693990071
          entropy_coeff: 0.009999999999999998
          kl: 0.014301688414182341
          policy_loss: 0.08216993591437736
          total_loss: 0.06420100807315773
          vf_explained_var: -1.0
          vf_loss: 0.00027667499283173433
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  iterations_since_restore: 558
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,558,6108.05,558000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-09-20_09-42-26
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 562
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8216285096274483
          entropy_coeff: 0.009999999999999998
          kl: 0.015115944175682077
          policy_loss: -0.004565116763114929
          total_loss: -0.02244378150337272
          vf_explained_var: -1.0
          vf_loss: 0.0003376151963796777
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iterations_since_restore: 559
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,559,6119.19,559000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-09-20_09-42-37
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 563
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8422652681668599
          entropy_coeff: 0.009999999999999998
          kl: 0.015334910057826607
          policy_loss: -0.027466909939216244
          total_loss: -0.045464713809390864
          vf_explained_var: -0.9988772869110107
          vf_loss: 0.00042484218574827536
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  iterations_since_restore: 560
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,560,6130.23,560000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-09-20_09-42-48
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 564
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.636758765909407
          entropy_coeff: 0.009999999999999998
          kl: 0.013452751794372376
          policy_loss: -0.02230395062102212
          total_loss: -0.03842282729844252
          vf_explained_var: -0.5877410173416138
          vf_loss: 0.0002487095616136988
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  iterations_since_restore: 561
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,561,6141.27,561000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-09-20_09-42-59
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 565
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.6357253988583882
          entropy_coeff: 0.009999999999999998
          kl: 0.019691870606193252
          policy_loss: 0.0472115026993884
          total_loss: 0.031019250634643766
          vf_explained_var: -0.6909124255180359
          vf_loss: 0.00016499659678730595
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  iterations_since_restore: 562
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,562,6152.26,562000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-09-20_09-43-10
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 566
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.7644202378061082
          entropy_coeff: 0.009999999999999998
          kl: 0.01866033626612591
          policy_loss: 0.05850597317847941
          total_loss: 0.04130306939284007
          vf_explained_var: -0.20493032038211823
          vf_loss: 0.0004412929305949041
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  iterations_since_restore: 563
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,563,6163.12,563000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-09-20_09-43-21
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 567
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.879245766914066e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8250151144133673
          entropy_coeff: 0.009999999999999998
          kl: 0.02076608074482006
          policy_loss: 0.003927288535568449
          total_loss: -0.01411140412092209
          vf_explained_var: -0.31205296516418457
          vf_loss: 0.00021145302008436476
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_since_restore: 564
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,564,6174.09,564000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-09-20_09-43-32
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 568
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.0161198708746166
          entropy_coeff: 0.009999999999999998
          kl: 0.012921477399749411
          policy_loss: -0.02109611084063848
          total_loss: -0.04107950607107745
          vf_explained_var: -0.7835261821746826
          vf_loss: 0.00017779714081471966
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_since_restore: 565
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,565,6185.14,565000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-09-20_09-43-43
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 569
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.0162943813535903
          entropy_coeff: 0.009999999999999998
          kl: 0.015576942203805188
          policy_loss: 0.012814555565516155
          total_loss: -0.0067603514840205515
          vf_explained_var: -0.6009588241577148
          vf_loss: 0.0005880305103750693
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterations_since_restore: 566
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,566,6196.08,566000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-09-20_09-43-55
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 570
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.107930972841051
          entropy_coeff: 0.009999999999999998
          kl: 0.01663315617962324
          policy_loss: -0.010402882099151611
          total_loss: -0.03127944982714123
          vf_explained_var: -0.4427327513694763
          vf_loss: 0.00020273750027020773
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iterations_since_restore: 567
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,567,6207.91,567000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-09-20_09-44-06
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 571
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.786157402727339
          entropy_coeff: 0.009999999999999998
          kl: 0.018501187957275414
          policy_loss: -0.024851637995905347
          total_loss: -0.04247980962196986
          vf_explained_var: -0.8551881313323975
          vf_loss: 0.00023339352968226496
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iterations_since_restore: 568
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,568,6219.02,568000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-09-20_09-44-17
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 572
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8727183394961886
          entropy_coeff: 0.009999999999999998
          kl: 0.018443763013880874
          policy_loss: -0.10049565368228489
          total_loss: -0.11898306707541148
          vf_explained_var: -0.7013654708862305
          vf_loss: 0.0002397601895305949
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iterations_since_restore: 569
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,569,6230.48,569000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-09-20_09-44-28
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 573
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.8338791211446126
          entropy_coeff: 0.009999999999999998
          kl: 0.015814754745605182
          policy_loss: 0.006299596445428
          total_loss: -0.011862144867579142
          vf_explained_var: -1.0
          vf_loss: 0.00017704120934164774
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations_since_restore: 570
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,570,6241.62,570000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-09-20_09-44-39
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 574
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.9797335134612188
          entropy_coeff: 0.009999999999999998
          kl: 0.012517578366847618
          policy_loss: 0.08590640475352605
          total_loss: 0.06620685255361927
          vf_explained_var: -0.39608103036880493
          vf_loss: 9.777731752870345e-05
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  iterations_since_restore: 571
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,571,6252.24,571000,0,0,0,995.71


Result for PPO_my_env_c2935_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-09-20_09-44-49
  done: false
  episode_len_mean: 995.71
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 575
  experiment_id: 47489c5ff1344717988ec3ea9512dd6e
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.818868650371094e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.941737511422899
          entropy_coeff: 0.009999999999999998
          kl: 0.014499802969125931
          policy_loss: 0.020591356646683483
          total_loss: 0.0013539570073286692
          vf_explained_var: -1.0
          vf_loss: 0.00017996860875023736
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  iterations_since_restore: 572
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c2935_00000,RUNNING,192.168.1.100:491044,572,6262.68,572000,0,0,0,995.71


