In [1]:
#!pip3 install ray torch torchvision tabulate tensorboard
#!pip3 install 'ray[rllib]'
#!pip3 install ray

In [6]:
#!pip3 uninstall -y iglu && pip3 install git+https://github.com/iglu-contest/iglu.git
#!pip3 install 'ray[rllib]'
#!pip3 install wandb
#!pip3 install torch torchvision

In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

from models import VisualEncoder
from train import *

In [2]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder(features_dim)
        #self.encoder.load_state_dict(
        #    torch.load("Visual Autoencoder weights and models/encoder_weigths.pth", map_location=torch.device('cpu'))
        #)
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [3]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C22']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C22 not pretrained"
                  }
              }

        },
        loggers=[WandbLogger])#callbacks=[
        #    CustomLoggerCallback(),
        #])

2021-09-17 10:30:48,371	INFO wandb.py:170 -- Already logged into W&B.
2021-09-17 10:30:48,386	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_52b7f_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=1260)[0m 2021-09-17 10:30:52,488	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=1260)[0m 2021-09-17 10:30:52,488	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-17_10-32-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -1.0
  episode_reward_mean: -1.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8840800762176513
          entropy_coeff: 0.009999999999999998
          kl: 0.0052462765865284805
          policy_loss: 0.10100602741456693
          total_loss: 0.3638703312828309
          vf_explained_var: -0.24777162075042725
          vf_loss: 0.29065584903906305
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,1,62.3299,1000,-1,-1,-1,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-17_10-32-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -1.0
  episode_reward_mean: -1.5
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.869246580865648
          entropy_coeff: 0.009999999999999998
          kl: 0.011319737190402036
          policy_loss: -0.03667076254884402
          total_loss: 0.09371679541137483
          vf_explained_var: -0.03631535544991493
          vf_loss: 0.15681607491957644
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,2,73.7208,2000,-1.5,-1,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-17_10-32-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8521761867735123
          entropy_coeff: 0.009999999999999998
          kl: 0.010228719906339508
          policy_loss: -0.15951182527674568
          total_loss: -0.1851644312342008
          vf_explained_var: 0.005779870320111513
          vf_loss: 0.000823413884306016
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,3,85.2931,3000,-1,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-17_10-32-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.75
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.848890770806207
          entropy_coeff: 0.009999999999999998
          kl: 0.011207164392573866
          policy_loss: -0.13432129621505737
          total_loss: -0.16001200137866867
          vf_explained_var: -0.06027766317129135
          vf_loss: 0.0005567707907175645
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,4,96.2103,4000,-0.75,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-17_10-32-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.842140785853068
          entropy_coeff: 0.009999999999999998
          kl: 0.010743191855565807
          policy_loss: -0.17705184523430134
          total_loss: -0.20304970575703515
          vf_explained_var: -0.377010315656662
          vf_loss: 0.0002749076003561236
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,5,105.353,5000,-0.6,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-17_10-32-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8443995979097156
          entropy_coeff: 0.009999999999999998
          kl: 0.008190267362769493
          policy_loss: -0.15388307869434356
          total_loss: -0.18053890532917446
          vf_explained_var: -0.48836278915405273
          vf_loss: 0.000150114481706017
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,6,114.669,6000,-0.5,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-17_10-33-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.42857142857142855
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.847299607594808
          entropy_coeff: 0.009999999999999998
          kl: 0.008962885047440219
          policy_loss: -0.13267004895541404
          total_loss: -0.15923279730810058
          vf_explained_var: -0.30060911178588867
          vf_loss: 0.0001176707019112655
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,7,124.118,7000,-0.428571,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-17_10-33-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.375
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.851990720960829
          entropy_coeff: 0.009999999999999998
          kl: 0.00574726114064239
          policy_loss: -0.09691248519553078
          total_loss: -0.12419385214646657
          vf_explained_var: -0.5367058515548706
          vf_loss: 8.908802262036867e-05
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,8,133.883,8000,-0.375,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-17_10-33-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.836118952433268
          entropy_coeff: 0.009999999999999998
          kl: 0.010634361182345446
          policy_loss: -0.101078635868099
          total_loss: -0.1271533771107594
          vf_explained_var: -0.5536850690841675
          vf_loss: 0.00015957660200203666
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,9,144.384,9000,-0.333333,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-17_10-33-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.822963325182597
          entropy_coeff: 0.009999999999999998
          kl: 0.008276352381990865
          policy_loss: -0.1356684045659171
          total_loss: -0.16218406214482253
          vf_explained_var: -0.39555680751800537
          vf_loss: 5.870492162406056e-05
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,10,154.41,10000,-0.3,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-17_10-33-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2727272727272727
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8284642722871567
          entropy_coeff: 0.009999999999999998
          kl: 0.011423514236280501
          policy_loss: -0.1200908087607887
          total_loss: -0.14596797774235407
          vf_explained_var: -0.5722293853759766
          vf_loss: 0.0001227692067105737
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,11,165.748,11000,-0.272727,0,-2,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-17_10-33-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8150553676817154
          entropy_coeff: 0.009999999999999998
          kl: 0.011145365266840804
          policy_loss: -0.062455154872602885
          total_loss: 0.03384215591682328
          vf_explained_var: 0.2707104980945587
          vf_loss: 0.12221878661463657
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,12,176.235,12000,-0.5,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-17_10-34-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.46153846153846156
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.817117346657647
          entropy_coeff: 0.009999999999999998
          kl: 0.014412033919385382
          policy_loss: -0.12370517436001036
          total_loss: -0.14470642333229383
          vf_explained_var: 0.47035571932792664
          vf_loss: 0.004287515378867586
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,13,187.216,13000,-0.461538,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-17_10-34-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.42857142857142855
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7930385616090563
          entropy_coeff: 0.009999999999999998
          kl: 0.013853290038673935
          policy_loss: 0.006681902623838849
          total_loss: -0.01710263482398457
          vf_explained_var: -0.07106836140155792
          vf_loss: 0.001375188631290156
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,14,198.519,14000,-0.428571,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-17_10-34-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7728970766067507
          entropy_coeff: 0.009999999999999998
          kl: 0.01076944931168558
          policy_loss: -0.09502497704492675
          total_loss: -0.11989402166671223
          vf_explained_var: -0.7383517622947693
          vf_loss: 0.0007060359705873352
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,15,209.188,15000,-0.4,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-17_10-34-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.375
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7362308316760595
          entropy_coeff: 0.009999999999999998
          kl: 0.012558819948976898
          policy_loss: -0.13496170205374558
          total_loss: -0.15920395793186293
          vf_explained_var: -0.5044809579849243
          vf_loss: 0.0006082876795618277
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,16,220.003,16000,-0.375,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-17_10-34-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.35294117647058826
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.767164585325453
          entropy_coeff: 0.009999999999999998
          kl: 0.01052333860336061
          policy_loss: -0.15111948980225456
          total_loss: -0.17646339366005526
          vf_explained_var: -0.45454075932502747
          vf_loss: 0.00022307237320799483
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,17,230.853,17000,-0.352941,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-17_10-35-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7476710875829062
          entropy_coeff: 0.009999999999999998
          kl: 0.010077119616005594
          policy_loss: -0.14747609585109683
          total_loss: -0.17274808742933803
          vf_explained_var: -0.9603556990623474
          vf_loss: 0.0001892943987008443
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,18,241.711,18000,-0.333333,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-17_10-35-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3157894736842105
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.746685838699341
          entropy_coeff: 0.009999999999999998
          kl: 0.006519233985550018
          policy_loss: 0.012845331006166008
          total_loss: -0.013170587038621307
          vf_explained_var: -0.62337327003479
          vf_loss: 0.000147093769596217
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,19,252.55,19000,-0.315789,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-17_10-35-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7546134736802843
          entropy_coeff: 0.009999999999999998
          kl: 0.01031397364018846
          policy_loss: -0.00533918912212054
          total_loss: -0.030678332555625173
          vf_explained_var: -1.0
          vf_loss: 0.0001441958826035261
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,20,263.488,20000,-0.3,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-17_10-35-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7693344460593328
          entropy_coeff: 0.009999999999999998
          kl: 0.005695995630214319
          policy_loss: 0.06475834269076586
          total_loss: 0.0701124481856823
          vf_explained_var: -0.43163618445396423
          vf_loss: 0.031908249699821076
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,21,274.464,21000,-0.333333,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-17_10-35-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3181818181818182
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7132312297821044
          entropy_coeff: 0.009999999999999998
          kl: 0.01120488199058634
          policy_loss: -0.14683635292781724
          total_loss: -0.17121434915396902
          vf_explained_var: -1.0
          vf_loss: 0.0005133406058626457
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,22,285.346,22000,-0.318182,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-17_10-35-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.30434782608695654
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7076077567206487
          entropy_coeff: 0.009999999999999998
          kl: 0.02212218843692278
          policy_loss: 0.2982171153028806
          total_loss: 0.27605080786678526
          vf_explained_var: -0.38164883852005005
          vf_loss: 0.00048533261975131206
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,23,296.307,23000,-0.304348,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-17_10-36-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2916666666666667
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.6993717167112563
          entropy_coeff: 0.009999999999999998
          kl: 0.010183728406238539
          policy_loss: 0.07369025266832775
          total_loss: 0.055539330343405406
          vf_explained_var: -0.42849358916282654
          vf_loss: 0.005787677632583331
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,24,307.11,24000,-0.291667,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-17_10-36-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.28
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.7437744405534534
          entropy_coeff: 0.009999999999999998
          kl: 0.008650780012404349
          policy_loss: -0.11346637457609177
          total_loss: -0.13751179915335443
          vf_explained_var: -0.7788160443305969
          vf_loss: 0.0007970881264352809
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,25,318.029,25000,-0.28,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-17_10-36-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2692307692307692
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.7222952710257635
          entropy_coeff: 0.009999999999999998
          kl: 0.00856828213071547
          policy_loss: -0.12048743251297209
          total_loss: -0.14495802943905195
          vf_explained_var: -0.20939777791500092
          vf_loss: 0.00018186964589403943
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,26,328.77,26000,-0.269231,0,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-17_10-36-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.2222222222222222
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.7424676126903957
          entropy_coeff: 0.009999999999999998
          kl: 0.004435902934521886
          policy_loss: -0.005353554400304953
          total_loss: 0.051765538503726324
          vf_explained_var: -0.32179194688796997
          vf_loss: 0.08321300009394893
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,27,340.009,27000,-0.222222,1,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-17_10-36-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.21428571428571427
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.707646359337701
          entropy_coeff: 0.009999999999999998
          kl: 0.016211278401088127
          policy_loss: -0.11730006006028917
          total_loss: -0.14067160561680794
          vf_explained_var: -0.1460801512002945
          vf_loss: 0.0012732252446261958
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,28,350.741,28000,-0.214286,1,-3,1000


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-17_10-37-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.20689655172413793
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.647217321395874
          entropy_coeff: 0.009999999999999998
          kl: 0.012527423178069224
          policy_loss: -0.03499750913017326
          total_loss: -0.05901114799910122
          vf_explained_var: -0.32465294003486633
          vf_loss: 0.0005794191778275288
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,29,361.457,29000,-0.206897,1,-3,1000




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-17_10-37-29
  done: false
  episode_len_mean: 996.0666666666667
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.2
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.62101477517022
          entropy_coeff: 0.009999999999999998
          kl: 0.009407828993379014
          policy_loss: -0.055977444267935224
          total_loss: -0.08014053271876441
          vf_explained_var: -0.5508862137794495
          vf_loss: 0.0006358831483844875
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,30,389.834,30000,-0.2,1,-3,996.067


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-17_10-37-42
  done: false
  episode_len_mean: 996.1935483870968
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.22580645161290322
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.6940680742263794
          entropy_coeff: 0.009999999999999998
          kl: 0.010137520556434697
          policy_loss: -0.10014041024777624
          total_loss: -0.044534631156259115
          vf_explained_var: 0.4268380105495453
          vf_loss: 0.08102583082185852
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,31,402.415,31000,-0.225806,1,-3,996.194


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-17_10-37-53
  done: false
  episode_len_mean: 996.3125
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.21875
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5866889423794217
          entropy_coeff: 0.009999999999999998
          kl: 0.014417272954489373
          policy_loss: -0.001454330732425054
          total_loss: -0.022088467743661667
          vf_explained_var: 0.39969635009765625
          vf_loss: 0.0030701598876880275
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,32,413.314,32000,-0.21875,1,-3,996.312


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-17_10-38-03
  done: false
  episode_len_mean: 996.4242424242424
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.18181818181818182
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.629647043016222
          entropy_coeff: 0.009999999999999998
          kl: 0.010482558475655858
          policy_loss: -0.05330499377515581
          total_loss: 0.031146783961190118
          vf_explained_var: 0.3829212486743927
          vf_loss: 0.10917586208217674
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,33,423.72,33000,-0.181818,1,-3,996.424


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-17_10-38-13
  done: false
  episode_len_mean: 996.5294117647059
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.11764705882352941
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.641906155480279
          entropy_coeff: 0.009999999999999998
          kl: 0.011535019437723362
          policy_loss: -0.08092019243372811
          total_loss: -0.055771290593677095
          vf_explained_var: 0.46582382917404175
          vf_loss: 0.04983770885608262
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,34,433.859,34000,-0.117647,2,-3,996.529


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-17_10-38-24
  done: false
  episode_len_mean: 996.6285714285714
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.629222053951687
          entropy_coeff: 0.009999999999999998
          kl: 0.012524586599379096
          policy_loss: -0.011046908878617817
          total_loss: 0.034887204443415004
          vf_explained_var: 0.17581544816493988
          vf_loss: 0.07034764254931361
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,35,444.445,35000,-0.2,2,-3,996.629


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-17_10-38-35
  done: false
  episode_len_mean: 996.7222222222222
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.3055555555555556
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5996163421207004
          entropy_coeff: 0.009999999999999998
          kl: 0.011854038846491033
          policy_loss: -0.04438696660929256
          total_loss: 0.009155305557780796
          vf_explained_var: -0.01875266246497631
          vf_loss: 0.07776032790231208
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,36,454.951,36000,-0.305556,2,-4,996.722


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-17_10-38-45
  done: false
  episode_len_mean: 996.8108108108108
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.35135135135135137
  episode_reward_min: -4.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5639727883868746
          entropy_coeff: 0.009999999999999998
          kl: 0.014034765491773439
          policy_loss: -0.10970805355658134
          total_loss: -0.08114768324626817
          vf_explained_var: 0.4222976565361023
          vf_loss: 0.052094885157162533
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,37,465.585,37000,-0.351351,2,-4,996.811


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-17_10-38-56
  done: false
  episode_len_mean: 996.8947368421053
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.5263157894736842
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5384414010577734
          entropy_coeff: 0.009999999999999998
          kl: 0.010245953513802508
          policy_loss: -0.01582064438197348
          total_loss: 0.04234045859840181
          vf_explained_var: 0.5629757642745972
          vf_loss: 0.08200862212106586
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,38,475.886,38000,-0.526316,2,-7,996.895


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-17_10-39-06
  done: false
  episode_len_mean: 996.974358974359
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.5384615384615384
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.554158565733168
          entropy_coeff: 0.009999999999999998
          kl: 0.011162503995865613
          policy_loss: -0.14033166848950915
          total_loss: -0.07968346464137237
          vf_explained_var: 0.2394406497478485
          vf_loss: 0.08451541273647713
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,39,486.163,39000,-0.538462,2,-7,996.974


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-17_10-39-16
  done: false
  episode_len_mean: 997.05
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.525
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.564166529973348
          entropy_coeff: 0.009999999999999998
          kl: 0.01078122720697686
          policy_loss: -0.0278818827536371
          total_loss: 0.045416447851392955
          vf_explained_var: 0.17903614044189453
          vf_loss: 0.09732281387680107
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,40,496.262,40000,-0.525,2,-7,997.05


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-17_10-39-26
  done: false
  episode_len_mean: 997.1219512195122
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.4634146341463415
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.720531023873223
          entropy_coeff: 0.009999999999999998
          kl: 0.006586936021463657
          policy_loss: -0.11690084636211395
          total_loss: -0.035568858600325055
          vf_explained_var: -0.12576617300510406
          vf_loss: 0.10754925724403519
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,41,506.677,41000,-0.463415,2,-7,997.122


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-17_10-39-37
  done: false
  episode_len_mean: 997.1904761904761
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.42857142857142855
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4650899675157336
          entropy_coeff: 0.009999999999999998
          kl: 0.014676139627915596
          policy_loss: -0.15752836792833275
          total_loss: -0.11239002119335863
          vf_explained_var: 0.5111238360404968
          vf_loss: 0.06758782452800208
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,42,517,42000,-0.428571,2,-7,997.19


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-17_10-39-47
  done: false
  episode_len_mean: 997.2558139534884
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.46511627906976744
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.490919746292962
          entropy_coeff: 0.009999999999999998
          kl: 0.013286573941282024
          policy_loss: 0.06578570728500684
          total_loss: 0.1564805943104956
          vf_explained_var: 0.366651713848114
          vf_loss: 0.11361110077963935
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,43,527.209,43000,-0.465116,2,-7,997.256


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-17_10-39-57
  done: false
  episode_len_mean: 997.3181818181819
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.45454545454545453
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5302426788542007
          entropy_coeff: 0.009999999999999998
          kl: 0.011318948511750736
          policy_loss: -0.08438720107078553
          total_loss: -0.06908097076747152
          vf_explained_var: 0.15983164310455322
          vf_loss: 0.03891081587514943
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,44,537.236,44000,-0.454545,2,-7,997.318


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-17_10-40-07
  done: false
  episode_len_mean: 997.3777777777777
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.4
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3687764326731364
          entropy_coeff: 0.009999999999999998
          kl: 0.01032283168559292
          policy_loss: -0.04681065926949183
          total_loss: -0.01192892889181773
          vf_explained_var: 0.14233936369419098
          vf_loss: 0.057021067845117714
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,45,547.497,45000,-0.4,2,-7,997.378


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-17_10-40-18
  done: false
  episode_len_mean: 997.4347826086956
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.391304347826087
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4245143360561796
          entropy_coeff: 0.009999999999999998
          kl: 0.010038466961559283
          policy_loss: -0.15087871270047293
          total_loss: -0.15645623654127122
          vf_explained_var: 0.3319392800331116
          vf_loss: 0.017161850662281115
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,46,557.814,46000,-0.391304,2,-7,997.435


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-17_10-40-28
  done: false
  episode_len_mean: 997.4893617021277
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.3829787234042553
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.49779405064053
          entropy_coeff: 0.009999999999999998
          kl: 0.009374480094662112
          policy_loss: -0.0769234781463941
          total_loss: -0.09596337427695592
          vf_explained_var: 0.4722324311733246
          vf_loss: 0.004531871881853375
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,47,568.066,47000,-0.382979,2,-7,997.489


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-17_10-40-38
  done: false
  episode_len_mean: 997.5416666666666
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.6545334974924724
          entropy_coeff: 0.009999999999999998
          kl: 0.008981364880902573
          policy_loss: -0.11860771372707354
          total_loss: -0.11210436044881741
          vf_explained_var: 0.04177277535200119
          vf_loss: 0.031701479360668194
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,48,578.281,48000,-0.333333,2,-7,997.542


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-17_10-40-49
  done: false
  episode_len_mean: 997.5918367346939
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.32653061224489793
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3218179278903537
          entropy_coeff: 0.009999999999999998
          kl: 0.010510435926563567
          policy_loss: -0.06427380152874523
          total_loss: -0.02275232093201743
          vf_explained_var: 0.30250316858291626
          vf_loss: 0.06316309761928601
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,49,588.628,49000,-0.326531,2,-7,997.592


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-17_10-40-59
  done: false
  episode_len_mean: 997.64
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.3
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4779651059044734
          entropy_coeff: 0.009999999999999998
          kl: 0.011396711592970576
          policy_loss: -0.0006720033784707387
          total_loss: 0.04179665330383513
          vf_explained_var: 0.15713050961494446
          vf_loss: 0.06553879893488354
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,50,598.738,50000,-0.3,2,-7,997.64


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-17_10-41-10
  done: false
  episode_len_mean: 997.6862745098039
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5494499550925362
          entropy_coeff: 0.009999999999999998
          kl: 0.008899433060046274
          policy_loss: 0.12052665998538335
          total_loss: 0.1489087101485994
          vf_explained_var: 0.10677681863307953
          vf_loss: 0.05254163747886196
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,51,609.882,51000,-0.333333,2,-7,997.686


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-17_10-41-20
  done: false
  episode_len_mean: 997.7307692307693
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.28846153846153844
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4439065880245634
          entropy_coeff: 0.009999999999999998
          kl: 0.01443944264651396
          policy_loss: -0.02311795447021723
          total_loss: -0.02690473049879074
          vf_explained_var: 0.30983540415763855
          vf_loss: 0.018486371548432443
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,52,619.979,52000,-0.288462,2,-7,997.731


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-17_10-41-31
  done: false
  episode_len_mean: 997.7735849056604
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2830188679245283
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3328451342052885
          entropy_coeff: 0.009999999999999998
          kl: 0.011412473811497043
          policy_loss: -0.11581057470498814
          total_loss: -0.0553324718028307
          vf_explained_var: 0.20151910185813904
          vf_loss: 0.08209468076109058
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,53,630.805,53000,-0.283019,2,-7,997.774


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-17_10-41-41
  done: false
  episode_len_mean: 997.8148148148148
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.24074074074074073
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3850217872195776
          entropy_coeff: 0.009999999999999998
          kl: 0.013800500925801663
          policy_loss: -0.1413688284655412
          total_loss: -0.13062608987092972
          vf_explained_var: -0.07942689955234528
          vf_loss: 0.03252288224434273
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,54,641.109,54000,-0.240741,2,-7,997.815


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-17_10-41-52
  done: false
  episode_len_mean: 997.8545454545455
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2545454545454545
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3668280204137164
          entropy_coeff: 0.009999999999999998
          kl: 0.010598838249482689
          policy_loss: -0.056968743768003254
          total_loss: -0.04604764497942394
          vf_explained_var: 0.2693183124065399
          vf_loss: 0.032999553431808534
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,55,652.056,55000,-0.254545,2,-7,997.855


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-17_10-42-03
  done: false
  episode_len_mean: 997.8928571428571
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.26785714285714285
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.48392399681939
          entropy_coeff: 0.009999999999999998
          kl: 0.012415379348915737
          policy_loss: -0.04175745679272546
          total_loss: 0.06948029200236003
          vf_explained_var: 0.19516532123088837
          vf_loss: 0.13421468320820068
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,56,662.974,56000,-0.267857,2,-7,997.893


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-17_10-42-13
  done: false
  episode_len_mean: 997.9298245614035
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22807017543859648
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1414334588580664
          entropy_coeff: 0.009999999999999998
          kl: 0.010941004071136387
          policy_loss: -0.06248415625757641
          total_loss: -0.07614764041370815
          vf_explained_var: 0.32292431592941284
          vf_loss: 0.006109700597719186
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,57,673.057,57000,-0.22807,2,-7,997.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-17_10-42-23
  done: false
  episode_len_mean: 997.9655172413793
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.22413793103448276
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.373874227205912
          entropy_coeff: 0.009999999999999998
          kl: 0.012579565048622302
          policy_loss: -0.038547158965633974
          total_loss: -0.04740183481739627
          vf_explained_var: -0.07602822035551071
          vf_loss: 0.01299713148166322
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,58,682.949,58000,-0.224138,2,-7,997.966


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-17_10-42-34
  done: false
  episode_len_mean: 998.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.1864406779661017
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3864356888665093
          entropy_coeff: 0.009999999999999998
          kl: 0.010687954091639121
          policy_loss: 0.005410085287359026
          total_loss: 0.03975870112578074
          vf_explained_var: 0.05668140947818756
          vf_loss: 0.056609778517546755
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,59,693.627,59000,-0.186441,2,-7,998




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-17_10-43-02
  done: false
  episode_len_mean: 995.8166666666667
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.2
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4168018129136826
          entropy_coeff: 0.009999999999999998
          kl: 0.012004057346887266
          policy_loss: 0.025600745446152158
          total_loss: 0.046089148852560255
          vf_explained_var: 0.3261348605155945
          vf_loss: 0.042855810412826635
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,60,721.672,60000,-0.2,2,-7,995.817


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-17_10-43-14
  done: false
  episode_len_mean: 995.8852459016393
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.19672131147540983
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2946357594596014
          entropy_coeff: 0.009999999999999998
          kl: 0.010469586027075109
          policy_loss: -0.03337453847957982
          total_loss: -0.004544870720969306
          vf_explained_var: 0.3082904815673828
          vf_loss: 0.05020558778455274
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,61,733.453,61000,-0.196721,2,-7,995.885


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-17_10-43-24
  done: false
  episode_len_mean: 995.9516129032259
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.16129032258064516
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4332562817467585
          entropy_coeff: 0.009999999999999998
          kl: 0.010084593687165327
          policy_loss: -0.043327590947349864
          total_loss: -0.05409569340861506
          vf_explained_var: 0.4273887574672699
          vf_loss: 0.012051769038144913
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,62,743.417,62000,-0.16129,2,-7,995.952


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-17_10-43-34
  done: false
  episode_len_mean: 996.015873015873
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.15873015873015872
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.439508459303114
          entropy_coeff: 0.009999999999999998
          kl: 0.012414529289766147
          policy_loss: -0.04254682450555265
          total_loss: -0.00567689725301332
          vf_explained_var: -0.10028425604104996
          vf_loss: 0.05940283093497985
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,63,753.334,63000,-0.15873,2,-7,996.016


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-17_10-43-44
  done: false
  episode_len_mean: 996.078125
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.140625
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3564470026228164
          entropy_coeff: 0.009999999999999998
          kl: 0.013166878984405791
          policy_loss: -0.07590864702231354
          total_loss: -0.0715486420939366
          vf_explained_var: 0.05184777081012726
          vf_loss: 0.02594944151933305
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,64,763.283,64000,-0.140625,2,-7,996.078


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-17_10-43-54
  done: false
  episode_len_mean: 996.1384615384616
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.12307692307692308
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4249793847401935
          entropy_coeff: 0.009999999999999998
          kl: 0.009941728070175859
          policy_loss: -0.04708085921075609
          total_loss: 0.016345452517271042
          vf_explained_var: 0.12372888624668121
          vf_loss: 0.08618484565781223
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,65,773.369,65000,-0.123077,2,-7,996.138


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-17_10-44-04
  done: false
  episode_len_mean: 996.1969696969697
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.12121212121212122
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.367449786927965
          entropy_coeff: 0.009999999999999998
          kl: 0.009931535650307759
          policy_loss: -0.05255954319404231
          total_loss: 0.0062088001933362745
          vf_explained_var: -0.17082978785037994
          vf_loss: 0.08095310975331813
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,66,783.729,66000,-0.121212,2,-7,996.197


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-17_10-44-14
  done: false
  episode_len_mean: 996.2537313432836
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.08955223880597014
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.297956715689765
          entropy_coeff: 0.009999999999999998
          kl: 0.007875882770354564
          policy_loss: -0.06898931949916813
          total_loss: -0.035164543406830895
          vf_explained_var: 0.32574641704559326
          vf_loss: 0.05562295726785022
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,67,793.985,67000,-0.0895522,2,-7,996.254


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-17_10-44-24
  done: false
  episode_len_mean: 996.3088235294117
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.07352941176470588
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.294609785079956
          entropy_coeff: 0.009999999999999998
          kl: 0.010243520345651546
          policy_loss: 0.021260682286487684
          total_loss: 0.020371648255321714
          vf_explained_var: 0.17902611196041107
          vf_loss: 0.02052053528605029
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,68,803.831,68000,-0.0735294,2,-7,996.309


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-17_10-44-34
  done: false
  episode_len_mean: 996.3623188405797
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.043478260869565216
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.357323185602824
          entropy_coeff: 0.009999999999999998
          kl: 0.009308814917118881
          policy_loss: -0.0743248597615295
          total_loss: -0.042877941992547776
          vf_explained_var: -0.1221558004617691
          vf_loss: 0.053623827453702685
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,69,813.655,69000,-0.0434783,2,-7,996.362


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-17_10-44-44
  done: false
  episode_len_mean: 996.4142857142857
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.014285714285714285
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.410275032785204
          entropy_coeff: 0.009999999999999998
          kl: 0.012268806695484047
          policy_loss: -0.08731406090988053
          total_loss: -0.07874919225772221
          vf_explained_var: 0.021750381216406822
          vf_loss: 0.03082729946408007
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,70,823.529,70000,-0.0142857,2,-7,996.414


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-17_10-44-54
  done: false
  episode_len_mean: 996.4647887323944
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.014084507042253521
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.360171733962165
          entropy_coeff: 0.009999999999999998
          kl: 0.012015970365299486
          policy_loss: 0.007983896860645876
          total_loss: 0.03286338159814477
          vf_explained_var: -0.08095967769622803
          vf_loss: 0.046678806269644865
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,71,833.335,71000,-0.0140845,2,-7,996.465


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-17_10-45-04
  done: false
  episode_len_mean: 996.5138888888889
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.013888888888888888
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.420813563134935
          entropy_coeff: 0.009999999999999998
          kl: 0.011763631968460588
          policy_loss: 0.04392637444867028
          total_loss: 0.07818978826204935
          vf_explained_var: 0.48844417929649353
          vf_loss: 0.0567070041783154
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,72,843.16,72000,-0.0138889,2,-7,996.514


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-17_10-45-14
  done: false
  episode_len_mean: 996.5616438356165
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: -0.0136986301369863
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.392137516869439
          entropy_coeff: 0.009999999999999998
          kl: 0.011425808907407922
          policy_loss: -0.029292654991149903
          total_loss: 0.03308379318979052
          vf_explained_var: 0.5203356146812439
          vf_loss: 0.08458395024968518
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,73,853.223,73000,-0.0136986,2,-7,996.562


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-17_10-45-24
  done: false
  episode_len_mean: 996.6081081081081
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.270624793900384
          entropy_coeff: 0.009999999999999998
          kl: 0.0179000897045071
          policy_loss: 0.02533449874156051
          total_loss: 0.022161419772439534
          vf_explained_var: 0.506225049495697
          vf_loss: 0.016848153323452505
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  node_ip: 192.168.1.100
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,74,863.559,74000,0,2,-7,996.608


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-17_10-45-34
  done: false
  episode_len_mean: 996.6533333333333
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.215782637066311
          entropy_coeff: 0.009999999999999998
          kl: 0.01789628879284181
          policy_loss: 0.07499919235706329
          total_loss: 0.08006225095854866
          vf_explained_var: 0.31050804257392883
          vf_loss: 0.024536442151293157
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,75,873.566,75000,0,2,-7,996.653


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-17_10-45-45
  done: false
  episode_len_mean: 996.6973684210526
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.6038427220450506
          entropy_coeff: 0.009999999999999998
          kl: 0.01736908584001821
          policy_loss: -0.06709800362586975
          total_loss: -0.08860138257344564
          vf_explained_var: 0.684374213218689
          vf_loss: 0.0019296863895659853
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,76,884.182,76000,0,2,-7,996.697


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-17_10-45-56
  done: false
  episode_len_mean: 996.7402597402597
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.599806380271912
          entropy_coeff: 0.009999999999999998
          kl: 0.01972384091574896
          policy_loss: -0.14472035591801008
          total_loss: -0.16658620950248507
          vf_explained_var: 0.6456493139266968
          vf_loss: 0.0011736314109940496
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,77,894.923,77000,0,2,-7,996.74


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-17_10-46-06
  done: false
  episode_len_mean: 996.7820512820513
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.542955173386468
          entropy_coeff: 0.009999999999999998
          kl: 0.016385880903735935
          policy_loss: -0.1033018633723259
          total_loss: -0.1245409782561991
          vf_explained_var: 0.4286384880542755
          vf_loss: 0.0017325526349143022
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,78,905.599,78000,0,2,-7,996.782


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-17_10-46-17
  done: false
  episode_len_mean: 996.8227848101266
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5739040427737767
          entropy_coeff: 0.009999999999999998
          kl: 0.013090370594560162
          policy_loss: -0.18316040829651886
          total_loss: -0.20584508627653123
          vf_explained_var: 0.46947330236434937
          vf_loss: 0.0010908072866085503
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,79,915.79,79000,0,2,-7,996.823


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-17_10-46-27
  done: false
  episode_len_mean: 996.8625
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.6398252964019777
          entropy_coeff: 0.009999999999999998
          kl: 0.009245974358187655
          policy_loss: -0.2893403660919931
          total_loss: -0.3138814643025398
          vf_explained_var: 0.16715297102928162
          vf_loss: 0.0004702585512455294
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,80,926.338,80000,0,2,-7,996.862


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-17_10-46-40
  done: false
  episode_len_mean: 996.9012345679013
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.5008334583706326
          entropy_coeff: 0.009999999999999998
          kl: 0.012026782716045641
          policy_loss: -0.23078171585996946
          total_loss: -0.25378804869121974
          vf_explained_var: 0.4901493787765503
          vf_loss: 0.00019798385263938042
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,81,938.867,81000,0,2,-7,996.901


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-17_10-46-51
  done: false
  episode_len_mean: 996.939024390244
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.4093432346979777
          entropy_coeff: 0.009999999999999998
          kl: 0.015973201852165397
          policy_loss: -0.15789620905286736
          total_loss: -0.17887452704211076
          vf_explained_var: 0.27124595642089844
          vf_loss: 0.0007191319209394148
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,82,950.301,82000,0,2,-7,996.939


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-17_10-47-02
  done: false
  episode_len_mean: 996.9759036144578
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.353852046860589
          entropy_coeff: 0.009999999999999998
          kl: 0.019581549888274423
          policy_loss: -0.17994934771623877
          total_loss: -0.20016179813279045
          vf_explained_var: -0.17352399230003357
          vf_loss: 0.0003888350137761639
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,83,961.248,83000,0,2,-7,996.976


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-17_10-47-13
  done: false
  episode_len_mean: 997.0119047619048
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3679659909672206
          entropy_coeff: 0.009999999999999998
          kl: 0.01973999585921743
          policy_loss: -0.07097924712838398
          total_loss: -0.09105422426429059
          vf_explained_var: -0.12262623012065887
          vf_loss: 0.0006436816382726344
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,84,972.122,84000,0,2,-7,997.012


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-17_10-47-24
  done: false
  episode_len_mean: 997.0470588235294
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.367321083280775
          entropy_coeff: 0.009999999999999998
          kl: 0.012979646354974535
          policy_loss: -0.11263329535722733
          total_loss: -0.13408797871735362
          vf_explained_var: -0.06647338718175888
          vf_loss: 0.00027158174925716593
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,85,982.752,85000,0,2,-7,997.047


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-17_10-47-34
  done: false
  episode_len_mean: 997.0813953488372
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.346440749698215
          entropy_coeff: 0.009999999999999998
          kl: 0.011979696214635377
          policy_loss: -0.05764516956276364
          total_loss: -0.07878578085866239
          vf_explained_var: -0.014683381654322147
          vf_loss: 0.000526839665932736
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,86,993.372,86000,0,2,-7,997.081


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-17_10-47-45
  done: false
  episode_len_mean: 997.1149425287356
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1879469288720026
          entropy_coeff: 0.009999999999999998
          kl: 0.009357174099006944
          policy_loss: -0.08134630978521373
          total_loss: -0.10159911302228769
          vf_explained_var: -0.15252716839313507
          vf_loss: 0.00022308796582769396
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,87,1004.17,87000,0,2,-7,997.115


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-17_10-47-56
  done: false
  episode_len_mean: 997.1477272727273
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.9190550009409586
          entropy_coeff: 0.009999999999999998
          kl: 0.012058210714436551
          policy_loss: -0.19082942505677541
          total_loss: -0.208095454176267
          vf_explained_var: 0.15417936444282532
          vf_loss: 0.00011578657823621243
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,88,1015.05,88000,0,2,-7,997.148


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-17_10-48-07
  done: false
  episode_len_mean: 997.1797752808989
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.490321816338433
          entropy_coeff: 0.009999999999999998
          kl: 0.022199047076347473
          policy_loss: -0.009972866914338536
          total_loss: -0.031030077404446074
          vf_explained_var: -0.27720338106155396
          vf_loss: 0.0005161513394947785
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,89,1025.85,89000,0,2,-7,997.18




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-17_10-48-35
  done: false
  episode_len_mean: 995.7333333333333
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.206837280591329
          entropy_coeff: 0.009999999999999998
          kl: 0.01397693007154825
          policy_loss: -0.16006374317738745
          total_loss: -0.17765876841213968
          vf_explained_var: -0.8684229254722595
          vf_loss: 0.0013285378245604484
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,90,1054.24,90000,0,2,-7,995.733


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-17_10-48-47
  done: false
  episode_len_mean: 995.7802197802198
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4451959901385836
          entropy_coeff: 0.009999999999999998
          kl: 0.015053046132017824
          policy_loss: 0.016313109464115568
          total_loss: -0.0027887991733021207
          vf_explained_var: -0.37111493945121765
          vf_loss: 0.001963112675649528
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,91,1066.34,91000,0,2,-7,995.78


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-17_10-48-58
  done: false
  episode_len_mean: 995.8260869565217
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.1737000624338787
          entropy_coeff: 0.009999999999999998
          kl: 0.014493160603834059
          policy_loss: 0.01656894056747357
          total_loss: 0.002765385475423601
          vf_explained_var: -0.8348501324653625
          vf_loss: 0.004672482071651353
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,92,1076.58,92000,0,2,-7,995.826


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-17_10-49-08
  done: false
  episode_len_mean: 995.8709677419355
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5041871706644696
          entropy_coeff: 0.009999999999999998
          kl: 0.0156489899415909
          policy_loss: -0.017940240270561642
          total_loss: -0.03859554926554362
          vf_explained_var: 0.22899462282657623
          vf_loss: 0.0008655359405868997
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,93,1086.6,93000,0,2,-7,995.871


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-17_10-49-18
  done: false
  episode_len_mean: 995.9148936170212
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.47028079562717
          entropy_coeff: 0.009999999999999998
          kl: 0.011609878494244013
          policy_loss: -0.03845824628240532
          total_loss: -0.06015949663188722
          vf_explained_var: -0.7552850246429443
          vf_loss: 0.000389333875823973
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,94,1096.65,94000,0,2,-7,995.915


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-17_10-49-28
  done: false
  episode_len_mean: 995.9578947368421
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3567528247833254
          entropy_coeff: 0.009999999999999998
          kl: 0.013696210938150008
          policy_loss: -0.025338977865046925
          total_loss: -0.045410781043271224
          vf_explained_var: -0.03521681949496269
          vf_loss: 0.00041407672753040163
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,95,1106.62,95000,0,2,-7,995.958


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-17_10-49-38
  done: false
  episode_len_mean: 996.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.378947318924798
          entropy_coeff: 0.009999999999999998
          kl: 0.012522948391086608
          policy_loss: -0.10042374812894397
          total_loss: -0.12074660344256295
          vf_explained_var: -0.056306347250938416
          vf_loss: 0.0006489520932922864
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,96,1116.49,96000,0,2,-7,996


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-17_10-49-48
  done: false
  episode_len_mean: 996.0412371134021
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3278475761413575
          entropy_coeff: 0.009999999999999998
          kl: 0.015337523812663612
          policy_loss: -0.08606818922691875
          total_loss: -0.10528165093726582
          vf_explained_var: -0.3044115900993347
          vf_loss: 0.0006140718483948149
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,97,1126.41,97000,0,2,-7,996.041


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-17_10-49-58
  done: false
  episode_len_mean: 996.0816326530612
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3533231258392333
          entropy_coeff: 0.009999999999999998
          kl: 0.016892694009523333
          policy_loss: -0.11206815648410055
          total_loss: -0.13129728767606946
          vf_explained_var: -0.4673505127429962
          vf_loss: 0.0005032444775376158
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,98,1136.41,98000,0,2,-7,996.082


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-17_10-50-08
  done: false
  episode_len_mean: 996.1212121212121
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.3851333459218345
          entropy_coeff: 0.009999999999999998
          kl: 0.0146051223591065
          policy_loss: 0.025760224295987025
          total_loss: 0.005714321053690381
          vf_explained_var: -0.6313683390617371
          vf_loss: 0.0005192784621613101
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,99,1146.67,99000,0,2,-7,996.121


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-17_10-50-18
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.0
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.33506817817688
          entropy_coeff: 0.009999999999999998
          kl: 0.014819972101785123
          policy_loss: -0.019141521263453695
          total_loss: -0.03849135459297233
          vf_explained_var: -0.4793490469455719
          vf_loss: 0.0006663544664560403
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,100,1156.74,100000,0,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-17_10-50-27
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.01
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.6385541094674005
          entropy_coeff: 0.009999999999999998
          kl: 0.014032478681066054
          policy_loss: -0.03385034576058388
          total_loss: -0.054990567887822785
          vf_explained_var: -0.562504231929779
          vf_loss: 0.0020880094500373364
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,101,1166.13,101000,0.01,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-17_10-50-37
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.228032620747884
          entropy_coeff: 0.009999999999999998
          kl: 0.01257099407833187
          policy_loss: -0.004417854299147924
          total_loss: -0.023405158354176416
          vf_explained_var: -0.21762360632419586
          vf_loss: 0.000464551295994574
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,102,1176.12,102000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-17_10-50-48
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2439244402779472
          entropy_coeff: 0.009999999999999998
          kl: 0.013141881351012448
          policy_loss: -0.10267029624018405
          total_loss: -0.12186826144655545
          vf_explained_var: -0.2385166436433792
          vf_loss: 0.00028435247950255873
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,103,1186.17,103000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-17_10-50-58
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.329834474457635
          entropy_coeff: 0.009999999999999998
          kl: 0.011376181083482904
          policy_loss: -0.0075157697002093
          total_loss: -0.027839519249068367
          vf_explained_var: -0.2900114357471466
          vf_loss: 0.0004149547651953374
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,104,1196.24,104000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-17_10-51-08
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.234820583131578
          entropy_coeff: 0.009999999999999998
          kl: 0.012359977107048791
          policy_loss: 0.11091310431559881
          total_loss: 0.09158619286285506
          vf_explained_var: -0.17708495259284973
          vf_loss: 0.00024029945917492215
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,105,1206.49,105000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-17_10-51-18
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4684633678860135
          entropy_coeff: 0.009999999999999998
          kl: 0.01291723030767865
          policy_loss: 0.04956422108742926
          total_loss: 0.028185729185740152
          vf_explained_var: -0.044950131326913834
          vf_loss: 0.00039976513192717297
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,106,1216.65,106000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-17_10-51-27
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.616463836034139
          entropy_coeff: 0.009999999999999998
          kl: 0.008815318944531587
          policy_loss: -0.08914613330529796
          total_loss: -0.11303085654249621
          vf_explained_var: 0.04435562714934349
          vf_loss: 0.00029646953058646533
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,107,1226.06,107000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-17_10-51-37
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.6795962466133965
          entropy_coeff: 0.009999999999999998
          kl: 0.0065019596031877964
          policy_loss: -0.0355247702035639
          total_loss: -0.06075565762196978
          vf_explained_var: 0.14673630893230438
          vf_loss: 0.00010213437617999767
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,108,1235.29,108000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-17_10-51-48
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2705948564741347
          entropy_coeff: 0.009999999999999998
          kl: 0.013903879027018777
          policy_loss: -0.06654291972517967
          total_loss: -0.08534948122170237
          vf_explained_var: -0.2536120116710663
          vf_loss: 0.0007710111809299431
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,109,1246.2,109000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-17_10-51-58
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4908366998036704
          entropy_coeff: 0.009999999999999998
          kl: 0.011678733478151947
          policy_loss: 0.006224148803287082
          total_loss: -0.015637447685003282
          vf_explained_var: 0.1255892813205719
          vf_loss: 0.0004190534317785932
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,110,1256.35,110000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-17_10-52-07
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.670605527030097
          entropy_coeff: 0.009999999999999998
          kl: 0.012008547240777764
          policy_loss: -0.03354993354943064
          total_loss: -0.057421627640724185
          vf_explained_var: -0.35138005018234253
          vf_loss: 0.00013243623487445196
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,111,1265.85,111000,0.03,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-17_10-52-17
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.598152420255873
          entropy_coeff: 0.009999999999999998
          kl: 0.011835312859313676
          policy_loss: 0.03562541815141837
          total_loss: 0.012424240840805902
          vf_explained_var: -0.24544309079647064
          vf_loss: 0.00011739909999353889
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,112,1275.71,112000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-17_10-52-27
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.511222653918796
          entropy_coeff: 0.009999999999999998
          kl: 0.014354732952689315
          policy_loss: 0.06534520942303869
          total_loss: 0.04440303337242868
          vf_explained_var: -0.478215754032135
          vf_loss: 0.0009402365784303078
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,113,1285.55,113000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-17_10-52-37
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.635335593753391
          entropy_coeff: 0.009999999999999998
          kl: 0.01704577231767084
          policy_loss: -0.03616515166229672
          total_loss: -0.05830503288242552
          vf_explained_var: 0.2082584798336029
          vf_loss: 0.00037817718865779977
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,114,1295.16,114000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-17_10-52-48
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4307548734876843
          entropy_coeff: 0.009999999999999998
          kl: 0.014512968353878808
          policy_loss: -0.04303381654123465
          total_loss: -0.06314179938700464
          vf_explained_var: -0.07781492918729782
          vf_loss: 0.000934146051036401
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,115,1306.1,115000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-17_10-52-59
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.43679100672404
          entropy_coeff: 0.009999999999999998
          kl: 0.014152944386818293
          policy_loss: 0.07838431662983364
          total_loss: 0.05765621612469355
          vf_explained_var: -0.9456408023834229
          vf_loss: 0.00045539490626348805
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,116,1316.93,116000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-17_10-53-09
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.286124246650272
          entropy_coeff: 0.009999999999999998
          kl: 0.013638040306789436
          policy_loss: -0.03452004517118136
          total_loss: -0.03868125304579735
          vf_explained_var: -0.538605272769928
          vf_loss: 0.015631472340869045
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,117,1327.27,117000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-17_10-53-21
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5500989225175648
          entropy_coeff: 0.009999999999999998
          kl: 0.01781948772492298
          policy_loss: -0.052633858016795584
          total_loss: -0.07323646197716395
          vf_explained_var: -0.5294155478477478
          vf_loss: 0.0008890019137955582
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,118,1339.5,118000,0.06,2,-7,996.16


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-17_10-53-34
  done: false
  episode_len_mean: 996.16
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.600300563706292
          entropy_coeff: 0.009999999999999998
          kl: 0.013747416490008697
          policy_loss: -0.032491764922936754
          total_loss: -0.05479192706859774
          vf_explained_var: -0.7009096741676331
          vf_loss: 0.0006096741616299066
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,119,1351.96,119000,0.06,2,-7,996.16




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-17_10-54-03
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2697140746646456
          entropy_coeff: 0.009999999999999998
          kl: 0.025011073133136483
          policy_loss: 0.01547949943277571
          total_loss: -0.0005160260531637404
          vf_explained_var: -0.07809901237487793
          vf_loss: 0.0010741208896635928
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,120,1380.89,120000,0.06,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-17_10-54-13
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.344082082642449
          entropy_coeff: 0.009999999999999998
          kl: 0.015302747945573852
          policy_loss: -0.08017009405626191
          total_loss: -0.09768477976322174
          vf_explained_var: -0.30596673488616943
          vf_loss: 0.0007614588453887134
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,121,1391.47,121000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-17_10-54-24
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2481180906295775
          entropy_coeff: 0.009999999999999998
          kl: 0.013489565098883311
          policy_loss: -0.0032812623100148307
          total_loss: -0.020197499502036306
          vf_explained_var: 0.14110177755355835
          vf_loss: 0.0010122129722731188
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,122,1401.84,122000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-17_10-54-34
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2217010670238073
          entropy_coeff: 0.009999999999999998
          kl: 0.013572207300106266
          policy_loss: -0.0742927467243539
          total_loss: -0.09133234131667349
          vf_explained_var: -0.3547532260417938
          vf_loss: 0.0005968008525087498
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,123,1412.32,123000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-17_10-54-44
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5222746239768132
          entropy_coeff: 0.009999999999999998
          kl: 0.010562803464457697
          policy_loss: -0.02325462336755461
          total_loss: -0.044445717562403945
          vf_explained_var: -0.8579029440879822
          vf_loss: 0.00046670508034165123
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,124,1422.47,124000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-17_10-54-54
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6938922127087912
          entropy_coeff: 0.009999999999999998
          kl: 0.016572979299734315
          policy_loss: 0.035469124217828114
          total_loss: 0.024911759462621478
          vf_explained_var: -0.07798673212528229
          vf_loss: 0.0007881746297546973
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,125,1432.35,125000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-17_10-55-04
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6369938876893784
          entropy_coeff: 0.009999999999999998
          kl: 0.015411741396828778
          policy_loss: -0.025435788184404375
          total_loss: -0.04578760183519787
          vf_explained_var: -0.5933629274368286
          vf_loss: 0.0008166565158818332
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,126,1442.3,126000,0.07,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-17_10-55-15
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3133685933219064
          entropy_coeff: 0.009999999999999998
          kl: 0.01090908901768193
          policy_loss: -0.023605333185858196
          total_loss: -0.04132970207267338
          vf_explained_var: -0.9801677465438843
          vf_loss: 0.001727500298552008
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,127,1452.53,127000,0.06,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-17_10-55-25
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2348410447438556
          entropy_coeff: 0.009999999999999998
          kl: 0.0128302073715723
          policy_loss: -0.046074498775932525
          total_loss: -0.06353434328403738
          vf_explained_var: -0.8125898838043213
          vf_loss: 0.0005583699965629623
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,128,1462.55,128000,0.06,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-17_10-55-34
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0613919681972925
          entropy_coeff: 0.009999999999999998
          kl: 0.019513855808949435
          policy_loss: -0.06278310467799504
          total_loss: -0.0761680081486702
          vf_explained_var: -0.5198114514350891
          vf_loss: 0.0006430921128614702
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,129,1472.21,129000,0.06,2,-7,994.79


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-17_10-55-44
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.489465437995063
          entropy_coeff: 0.009999999999999998
          kl: 0.014209094037633966
          policy_loss: -0.03858789313170645
          total_loss: -0.05829011524717013
          vf_explained_var: -0.28310060501098633
          vf_loss: 0.0003968606813941733
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,130,1481.73,130000,0.06,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-17_10-55-54
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.888838373290168
          entropy_coeff: 0.009999999999999998
          kl: 0.01682069334351771
          policy_loss: -0.05746524574028121
          total_loss: -0.07031246829364035
          vf_explained_var: -0.7357526421546936
          vf_loss: 0.0003641757375185585
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,131,1491.81,131000,0.07,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-17_10-56-04
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0405317889319528
          entropy_coeff: 0.009999999999999998
          kl: 0.01480096338162728
          policy_loss: -0.050550043562220204
          total_loss: -0.06548143968813949
          vf_explained_var: -0.3912461996078491
          vf_loss: 0.0004785990340880946
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,132,1501.86,132000,0.07,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-17_10-56-14
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.829920188585917
          entropy_coeff: 0.009999999999999998
          kl: 0.016601967410808077
          policy_loss: -0.03594658474127452
          total_loss: -0.04817776812447442
          vf_explained_var: 0.12375698983669281
          vf_loss: 0.00046485298096538626
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,133,1511.7,133000,0.06,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-17_10-56-24
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.04
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.068759735425313
          entropy_coeff: 0.009999999999999998
          kl: 0.01641457246149369
          policy_loss: -0.02765360607041253
          total_loss: -0.04249037297235595
          vf_explained_var: 0.11865590512752533
          vf_loss: 0.0003109106653331158
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,134,1521.77,134000,0.04,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-17_10-56-34
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.07
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2414318958918256
          entropy_coeff: 0.009999999999999998
          kl: 0.009710176915969834
          policy_loss: -0.04991905219439003
          total_loss: -0.06860196058534913
          vf_explained_var: -0.7741437554359436
          vf_loss: 0.0004542246163408789
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,135,1531.69,135000,0.07,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-17_10-56-43
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.11
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.398867705133226
          entropy_coeff: 0.009999999999999998
          kl: 0.013174507352642726
          policy_loss: -0.06262925134764777
          total_loss: -0.0816917480693923
          vf_explained_var: 0.05246450752019882
          vf_loss: 0.0004797785493994727
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,136,1540.83,136000,0.11,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-17_10-56-53
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.13
  episode_reward_min: -7.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.298553677399953
          entropy_coeff: 0.009999999999999998
          kl: 0.017489685626858473
          policy_loss: 0.07495224542087979
          total_loss: 0.06706938876046074
          vf_explained_var: -0.08836576342582703
          vf_loss: 0.009199907712334405
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,137,1550.32,137000,0.13,2,-7,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-17_10-57-03
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.2
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1039955152405634
          entropy_coeff: 0.009999999999999998
          kl: 0.014108709537878333
          policy_loss: 0.016607618497477637
          total_loss: 0.0013821358895964092
          vf_explained_var: -0.10786023736000061
          vf_loss: 0.0010527835389237024
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,138,1560.57,138000,0.2,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-17_10-57-13
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.21
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.083566235171424
          entropy_coeff: 0.009999999999999998
          kl: 0.010569022448810432
          policy_loss: 0.019055424071848393
          total_loss: 0.002337370150619083
          vf_explained_var: 0.07382503151893616
          vf_loss: 0.0005505620003936606
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,139,1570.81,139000,0.21,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-17_10-57-24
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.21
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8129965133137174
          entropy_coeff: 0.009999999999999998
          kl: 0.013237739064524452
          policy_loss: -0.0036094206074873607
          total_loss: -0.016013983223173354
          vf_explained_var: -0.09127794206142426
          vf_loss: 0.0012576691288914946
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,140,1581.28,140000,0.21,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-17_10-57-34
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.19
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6570304128858777
          entropy_coeff: 0.009999999999999998
          kl: 0.013309458051152252
          policy_loss: -0.024400141131546763
          total_loss: -0.0357762336730957
          vf_explained_var: 0.015339531004428864
          vf_loss: 0.0007022670575275293
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,141,1591.65,141000,0.19,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-17_10-57-44
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.18
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3810144861539204
          entropy_coeff: 0.009999999999999998
          kl: 0.020284860064447387
          policy_loss: 0.033172275208764604
          total_loss: 0.026862609800365235
          vf_explained_var: -0.07156277447938919
          vf_loss: 0.0006543386388026799
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,142,1601.9,142000,0.18,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-17_10-57-55
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.2
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3257682204246521
          entropy_coeff: 0.009999999999999998
          kl: 0.005324946486998172
          policy_loss: 0.02867644735508495
          total_loss: 0.018478968056539695
          vf_explained_var: -0.2976551055908203
          vf_loss: 0.00036444734242751213
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,143,1612.2,143000,0.2,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-09-17_10-58-05
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.2
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 144
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.991287773185306
          entropy_coeff: 0.009999999999999998
          kl: 0.0163963911244025
          policy_loss: -0.02240326851606369
          total_loss: -0.033395697962906624
          vf_explained_var: -0.5788459777832031
          vf_loss: 0.000619775222407447
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,144,1622.16,144000,0.2,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-09-17_10-58-15
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.18
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.788921554883321
          entropy_coeff: 0.009999999999999998
          kl: 0.009131930837718648
          policy_loss: 0.025283765296141306
          total_loss: 0.012777993745274014
          vf_explained_var: -0.303287148475647
          vf_loss: 0.0007604042220110488
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,145,1632.6,145000,0.18,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-09-17_10-58-25
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.18
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8608945356474982
          entropy_coeff: 0.009999999999999998
          kl: 0.011170088970861988
          policy_loss: -0.016022323071956633
          total_loss: -0.028513415820068784
          vf_explained_var: -0.8107267618179321
          vf_loss: 0.0004629975854994781
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,146,1642.83,146000,0.18,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-09-17_10-58-36
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.18
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4823205577002632
          entropy_coeff: 0.009999999999999998
          kl: 0.005416533537035504
          policy_loss: -0.05575202920784553
          total_loss: -0.06747174207121134
          vf_explained_var: 0.2180517017841339
          vf_loss: 0.0003613716885107957
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,147,1653.15,147000,0.18,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-09-17_10-58-46
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.16
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3175871756341722
          entropy_coeff: 0.009999999999999998
          kl: 0.006169156112746382
          policy_loss: 0.109191133081913
          total_loss: 0.09943055361509323
          vf_explained_var: 0.015456883236765862
          vf_loss: 0.00029215708127594553
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,148,1663.8,148000,0.16,2,-2,995.97


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-09-17_10-58-57
  done: false
  episode_len_mean: 995.97
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.16
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3634496370951334
          entropy_coeff: 0.009999999999999998
          kl: 0.01159765072204111
          policy_loss: 0.04816485246022542
          total_loss: 0.040671227541234756
          vf_explained_var: -0.32158103585243225
          vf_loss: 0.0002695625781295045
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,149,1674.25,149000,0.16,2,-2,995.97




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-09-17_10-59-26
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.15
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7038347442944846
          entropy_coeff: 0.009999999999999998
          kl: 0.009312254210533558
          policy_loss: -0.01893878397014406
          total_loss: -0.03068759371009138
          vf_explained_var: 0.08207765966653824
          vf_loss: 0.0005752068801989986
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,150,1703.59,150000,0.15,2,-2,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-09-17_10-59-37
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.17
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 151
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6779071066114637
          entropy_coeff: 0.009999999999999998
          kl: 0.007144074596756174
          policy_loss: 0.05993024052845107
          total_loss: 0.04753451278019283
          vf_explained_var: -0.0720881000161171
          vf_loss: 0.0007666540322437261
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,151,1714.08,151000,0.17,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-09-17_10-59-47
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.15
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4990594744682313
          entropy_coeff: 0.009999999999999998
          kl: 0.010871369019471663
          policy_loss: -0.0012543980239166153
          total_loss: -0.007318904892437987
          vf_explained_var: 0.06412074714899063
          vf_loss: 0.0034224564140054605
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,152,1724.43,152000,0.15,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-09-17_10-59-57
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.15
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 153
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1729356951183743
          entropy_coeff: 0.009999999999999998
          kl: 0.009326090601867499
          policy_loss: 0.02371125751071506
          total_loss: 0.007033009496000078
          vf_explained_var: 0.043826114386320114
          vf_loss: 0.00032977803827331326
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,153,1733.88,153000,0.15,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-09-17_11-00-06
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.13
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 154
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5769059658050537
          entropy_coeff: 0.009999999999999998
          kl: 0.010634295618187945
          policy_loss: -0.015039087169700198
          total_loss: -0.035015146599875556
          vf_explained_var: 0.38117820024490356
          vf_loss: 0.00040938705239770774
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,154,1743.16,154000,0.13,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-09-17_11-00-16
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.14
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0970745272106592
          entropy_coeff: 0.009999999999999998
          kl: 0.007698713253763145
          policy_loss: -0.11239708862784836
          total_loss: -0.1286976926235689
          vf_explained_var: -0.10479889810085297
          vf_loss: 0.0007726661388915091
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,155,1753.01,155000,0.14,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-09-17_11-00-26
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.15
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.670454474290212
          entropy_coeff: 0.009999999999999998
          kl: 0.012771876912785135
          policy_loss: -0.03391663179629379
          total_loss: -0.04312423446940051
          vf_explained_var: -0.01618622988462448
          vf_loss: 0.0010311803890443924
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,156,1763.27,156000,0.15,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-09-17_11-00-36
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.13
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3833217951986525
          entropy_coeff: 0.009999999999999998
          kl: 0.009533461153151753
          policy_loss: 0.11251545051733652
          total_loss: 0.10450327814453178
          vf_explained_var: -0.01666773110628128
          vf_loss: 0.0009947300133515253
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,157,1773.68,157000,0.13,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-09-17_11-00-47
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.13
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6384264296955533
          entropy_coeff: 0.009999999999999998
          kl: 0.010433981701038616
          policy_loss: 0.01134790347682105
          total_loss: 0.0009731046441528533
          vf_explained_var: -0.7176402807235718
          vf_loss: 0.000727260601221739
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,158,1784,158000,0.13,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-09-17_11-00-57
  done: false
  episode_len_mean: 994.6
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.11
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4106283254093595
          entropy_coeff: 0.009999999999999998
          kl: 0.012918988993766018
          policy_loss: 0.005773615588744481
          total_loss: -0.0013661497582991918
          vf_explained_var: -0.621155858039856
          vf_loss: 0.0004262766802437707
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,159,1794.37,159000,0.11,2,-1,994.6


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-09-17_11-01-07
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.12
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 160
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9137677457597522
          entropy_coeff: 0.009999999999999998
          kl: 0.012291503713535176
          policy_loss: -0.09112357778681648
          total_loss: -0.10165562646256553
          vf_explained_var: -0.4813947081565857
          vf_loss: 0.002383052331182019
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,160,1804.3,160000,0.12,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-09-17_11-01-17
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.12
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 161
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.4310067839092677
          entropy_coeff: 0.009999999999999998
          kl: 0.01565998047539887
          policy_loss: -0.017453827129469978
          total_loss: -0.03240731341971292
          vf_explained_var: 0.13516399264335632
          vf_loss: 0.0014287205598925033
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,161,1813.83,161000,0.12,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-09-17_11-01-26
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.1
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.190664225154453
          entropy_coeff: 0.009999999999999998
          kl: 0.010824558451883999
          policy_loss: -0.12057226912842857
          total_loss: -0.13651702536476984
          vf_explained_var: -0.2148207277059555
          vf_loss: 0.0004819512129971473
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,162,1823.58,162000,0.1,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-09-17_11-01-37
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.1
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8918573604689704
          entropy_coeff: 0.009999999999999998
          kl: 0.01175949684140442
          policy_loss: -0.0891940256787671
          total_loss: -0.10015471730795171
          vf_explained_var: -0.35300472378730774
          vf_loss: 0.0020046352952097853
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,163,1833.88,163000,0.1,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-09-17_11-01-47
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.09
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1704626970820957
          entropy_coeff: 0.009999999999999998
          kl: 0.013939547826684464
          policy_loss: -0.0519303867386447
          total_loss: -0.06556367029746374
          vf_explained_var: -0.40505117177963257
          vf_loss: 0.0010144463740289211
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,164,1844.21,164000,0.09,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-09-17_11-01-58
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.08
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.178907411628299
          entropy_coeff: 0.009999999999999998
          kl: 0.011625942576643143
          policy_loss: 0.032379552721977234
          total_loss: 0.01699422746896744
          vf_explained_var: -0.704806923866272
          vf_loss: 0.000518113606732287
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,165,1854.57,165000,0.08,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-09-17_11-02-08
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.08
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 166
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2224747472339206
          entropy_coeff: 0.009999999999999998
          kl: 0.01317578535290621
          policy_loss: -0.001635990043481191
          total_loss: -0.016316442812482516
          vf_explained_var: -0.9812367558479309
          vf_loss: 0.0008740512401952098
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,166,1864.88,166000,0.08,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-09-17_11-02-18
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.06
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 167
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.335572028160095
          entropy_coeff: 0.009999999999999998
          kl: 0.015070919458633201
          policy_loss: 0.03638131335998575
          total_loss: 0.021366948241160977
          vf_explained_var: -0.6098153591156006
          vf_loss: 0.0007117013825336471
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,167,1875.15,167000,0.06,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-09-17_11-02-28
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.05
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 168
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.3011095682779947
          entropy_coeff: 0.009999999999999998
          kl: 0.012728784881423814
          policy_loss: -0.05787673791249593
          total_loss: -0.07370088555746608
          vf_explained_var: -0.29079124331474304
          vf_loss: 0.0007429998332453478
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,168,1885.43,168000,0.05,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-09-17_11-02-39
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.03
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.274161137474908
          entropy_coeff: 0.009999999999999998
          kl: 0.012800890601594507
          policy_loss: 0.006575019988748762
          total_loss: -0.007906599673959944
          vf_explained_var: -0.41175511479377747
          vf_loss: 0.0017795432397785286
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,169,1895.96,169000,0.03,2,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-09-17_11-02-49
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 170
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2840764893425836
          entropy_coeff: 0.009999999999999998
          kl: 0.010646689515057477
          policy_loss: -0.029184678776396647
          total_loss: -0.04553200455589427
          vf_explained_var: -0.3276887536048889
          vf_loss: 0.001103551326216095
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,170,1906.38,170000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-09-17_11-03-00
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 171
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.38516137070126
          entropy_coeff: 0.009999999999999998
          kl: 0.011094676014775474
          policy_loss: -0.08371319671471913
          total_loss: -0.10148370448085997
          vf_explained_var: -0.6827777624130249
          vf_loss: 0.0004644266007946701
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,171,1916.7,171000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-09-17_11-03-10
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 172
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.182523663838704
          entropy_coeff: 0.009999999999999998
          kl: 0.01571033091619191
          policy_loss: -0.07426611404452059
          total_loss: -0.08720737256937557
          vf_explained_var: 0.06249856948852539
          vf_loss: 0.0009306250545907662
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,172,1926.75,172000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-09-17_11-03-20
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.02
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.224958732393053
          entropy_coeff: 0.009999999999999998
          kl: 0.010371352499100803
          policy_loss: -0.11046441404355897
          total_loss: -0.12078305847114987
          vf_explained_var: 0.5977604985237122
          vf_loss: 0.0066804433642472655
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,173,1937.31,173000,0.02,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-09-17_11-03-31
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.282733792728848
          entropy_coeff: 0.009999999999999998
          kl: 0.017467400343491334
          policy_loss: -0.11323611707323128
          total_loss: -0.125669711165958
          vf_explained_var: -0.27573341131210327
          vf_loss: 0.0015508715056865993
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,174,1948.28,174000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-09-17_11-03-43
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 175
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.3679453796810574
          entropy_coeff: 0.009999999999999998
          kl: 0.012870424175674897
          policy_loss: -0.06831992051253716
          total_loss: -0.06857431355553369
          vf_explained_var: -0.3477434515953064
          vf_loss: 0.01690940961578033
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,175,1959.56,175000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-09-17_11-03-53
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 176
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2835155619515315
          entropy_coeff: 0.009999999999999998
          kl: 0.013064539191614037
          policy_loss: 0.0297146574076679
          total_loss: 0.014081344629327456
          vf_explained_var: -0.16117353737354279
          vf_loss: 0.0005879176376361606
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,176,1969.83,176000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-09-17_11-04-04
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 177
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.227232805887858
          entropy_coeff: 0.009999999999999998
          kl: 0.011936475693456532
          policy_loss: -0.025215549446228478
          total_loss: -0.040716562968575294
          vf_explained_var: -0.14029870927333832
          vf_loss: 0.0007284702222225153
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,177,1980.34,177000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-09-17_11-04-14
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 178
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1923691670099896
          entropy_coeff: 0.009999999999999998
          kl: 0.011254348768065527
          policy_loss: -0.010531577467918395
          total_loss: -0.026149318284458583
          vf_explained_var: -0.40956616401672363
          vf_loss: 0.0006084373831981793
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,178,1990.38,178000,0.01,1,0,995.93


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-09-17_11-04-24
  done: false
  episode_len_mean: 995.93
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.180376594596439
          entropy_coeff: 0.009999999999999998
          kl: 0.0156926360711396
          policy_loss: -0.13126194303234417
          total_loss: -0.14087089598178865
          vf_explained_var: -0.8716051578521729
          vf_loss: 0.00425041631808401
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,179,2000.46,179000,0.01,1,0,995.93




Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-09-17_11-04-53
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1687488238016766
          entropy_coeff: 0.009999999999999998
          kl: 0.011667559376757808
          policy_loss: -0.020664643889500036
          total_loss: -0.03556946383582221
          vf_explained_var: -0.17514975368976593
          vf_loss: 0.0008759678661590442
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,180,2029.48,180000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-09-17_11-05-04
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 181
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.309056300587124
          entropy_coeff: 0.009999999999999998
          kl: 0.01201441664914621
          policy_loss: -0.0564750418273939
          total_loss: -0.07263309222956499
          vf_explained_var: -0.24261228740215302
          vf_loss: 0.000850212015858334
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,181,2040.16,181000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-09-17_11-05-14
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 182
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.225244069099426
          entropy_coeff: 0.009999999999999998
          kl: 0.01680151791036187
          policy_loss: -0.1111113323105706
          total_loss: -0.12377512339088652
          vf_explained_var: -0.10492980480194092
          vf_loss: 0.0010828832436042526
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,182,2050.29,182000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-09-17_11-05-24
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 183
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.265346254242791
          entropy_coeff: 0.009999999999999998
          kl: 0.011703601084769252
          policy_loss: -0.040201109668446915
          total_loss: -0.05631825142643518
          vf_explained_var: -0.7376633286476135
          vf_loss: 0.000611370137873261
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,183,2060.69,183000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-09-17_11-05-34
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 184
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.21157714260949
          entropy_coeff: 0.009999999999999998
          kl: 0.012727961205346134
          policy_loss: -0.10105713274743822
          total_loss: -0.11610578108165
          vf_explained_var: -0.34089672565460205
          vf_loss: 0.000623591627421168
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,184,2070.97,184000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-09-17_11-05-44
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 185
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.174842890103658
          entropy_coeff: 0.009999999999999998
          kl: 0.016109971798889372
          policy_loss: -0.10752680874947045
          total_loss: -0.12055228675405184
          vf_explained_var: -0.8872271180152893
          vf_loss: 0.0005672761510747174
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,185,2081.06,185000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-09-17_11-05-55
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 186
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2400975200865005
          entropy_coeff: 0.009999999999999998
          kl: 0.01211819023545478
          policy_loss: -0.055414970964193344
          total_loss: -0.07126228403713968
          vf_explained_var: -0.8475503921508789
          vf_loss: 0.0004188264197889819
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,186,2091.31,186000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-09-17_11-06-05
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 187
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.228593791855706
          entropy_coeff: 0.009999999999999998
          kl: 0.012164522411974271
          policy_loss: -0.09036179184913636
          total_loss: -0.10591664765444067
          vf_explained_var: -0.7796304225921631
          vf_loss: 0.0005727878351333654
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,187,2101.35,187000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-09-17_11-06-15
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 188
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.217365324497223
          entropy_coeff: 0.009999999999999998
          kl: 0.012953798971556334
          policy_loss: -0.023773056993054018
          total_loss: -0.033955124786330594
          vf_explained_var: -0.955640435218811
          vf_loss: 0.005433721238902459
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,188,2111.69,188000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-09-17_11-06-25
  done: false
  episode_len_mean: 994.57
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.267400532298618
          entropy_coeff: 0.009999999999999998
          kl: 0.01370651001896124
          policy_loss: -0.04136600012166632
          total_loss: -0.05643868429793252
          vf_explained_var: -1.0
          vf_loss: 0.0006624003523029387
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 192.168.1.100
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,189,2121.65,189000,0.01,1,0,994.57


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-09-17_11-06-36
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8242662787437438
          entropy_coeff: 0.009999999999999998
          kl: 0.013770649587550447
          policy_loss: -0.08026177552011278
          total_loss: -0.08465497112936443
          vf_explained_var: -0.5383815765380859
          vf_loss: 0.006878074133419431
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,190,2132.08,190000,0.01,1,0,995.9


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-09-17_11-06-46
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 191
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0665960788726805
          entropy_coeff: 0.009999999999999998
          kl: 0.010598838887983872
          policy_loss: -0.052793215851609905
          total_loss: -0.06752159482695991
          vf_explained_var: -0.6836352348327637
          vf_loss: 0.0005719186663756975
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,191,2142.25,191000,0.01,1,0,995.9


Result for PPO_my_env_52b7f_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-09-17_11-06-56
  done: false
  episode_len_mean: 995.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 192
  experiment_id: 47168954e393400b9c4f2abc75550571
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1529735565185546
          entropy_coeff: 0.009999999999999998
          kl: 0.012683575204726181
          policy_loss: -0.027272874825737543
          total_loss: -0.041187533599117566
          vf_explained_var: -0.4902389645576477
          vf_loss: 0.0011940196690071994
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_52b7f_00000,RUNNING,192.168.1.100:1260,192,2152.57,192000,0.01,1,0,995.9
