In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        mlp_feature_dim = 128
        self.mlp = nn.Sequential(
            nn.Linear(features_dim, mlp_feature_dim),
            nn.ELU(),
            nn.Linear(mlp_feature_dim, mlp_feature_dim),
            nn.ELU(),
            nn.Linear(mlp_feature_dim, mlp_feature_dim),
            nn.ELU(),
        )
        self.action_head = nn.Linear(mlp_feature_dim, action_space.n)
        self.value_head = nn.Linear(mlp_feature_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.mlp.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
        
        with torch.no_grad():
            features = self.encoder(obs)
            
        features = self.mlp(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained and frozen (AnnaCNN), MLP added (128)",
                      "notes": "camera noop removed from actions"
                  }
              }

        },
        loggers=[WandbLogger])

2021-09-27 09:45:26,222	INFO wandb.py:170 -- Already logged into W&B.
2021-09-27 09:45:26,238	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_a44fc_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=457)[0m 2021-09-27 09:45:30,315	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=457)[0m 2021-09-27 09:45:30,315	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-27_09-46-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.805732337633769
          entropy_coeff: 0.009999999999999998
          kl: 0.010329088540200829
          policy_loss: 0.04055147626333767
          total_loss: 0.015616235468122694
          vf_explained_var: 0.22965596616268158
          vf_loss: 0.0010562666378165079
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,1,59.5478,1000,0,0,0,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-27_09-46-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8035645617379084
          entropy_coeff: 0.009999999999999998
          kl: 0.013295291665596388
          policy_loss: 0.04517248993118604
          total_loss: 0.020256566173500485
          vf_explained_var: 0.374893456697464
          vf_loss: 0.00046066646367156255
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,2,71.051,2000,0,0,0,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-27_09-46-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8037554873360526
          entropy_coeff: 0.009999999999999998
          kl: 0.010338222757646814
          policy_loss: -0.05599654912948608
          total_loss: -0.06599820686711205
          vf_explained_var: -0.4692392647266388
          vf_loss: 0.01596825069670255
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,3,82.1755,3000,-1,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-27_09-47-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.75
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7964626815583973
          entropy_coeff: 0.009999999999999998
          kl: 0.010901198233542731
          policy_loss: 0.03405804791384273
          total_loss: 0.008928658937414487
          vf_explained_var: 0.1497691422700882
          vf_loss: 0.0006549998982033382
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,4,92.9289,4000,-0.75,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-27_09-47-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7887028376261394
          entropy_coeff: 0.009999999999999998
          kl: 0.012098019660501065
          policy_loss: -0.030468663490480847
          total_loss: -0.05558622462881936
          vf_explained_var: -0.0752684697508812
          vf_loss: 0.00034986095399492317
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,5,104.253,5000,-0.6,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-27_09-47-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.77143173482683
          entropy_coeff: 0.009999999999999998
          kl: 0.012219150867285405
          policy_loss: 0.06433627787563535
          total_loss: 0.03963676417867343
          vf_explained_var: -0.4600197672843933
          vf_loss: 0.0005709710395118843
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,6,115.707,6000,-0.5,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-27_09-47-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.42857142857142855
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7427472008599176
          entropy_coeff: 0.009999999999999998
          kl: 0.009640759892151456
          policy_loss: -0.04384344079428249
          total_loss: -0.06899384649263487
          vf_explained_var: -0.739227294921875
          vf_loss: 0.0003489170508045289
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,7,126.438,7000,-0.428571,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-27_09-47-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.375
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.714538672235277
          entropy_coeff: 0.009999999999999998
          kl: 0.010679104646621848
          policy_loss: 0.024115999622477426
          total_loss: -0.0006353208588229286
          vf_explained_var: -0.5791343450546265
          vf_loss: 0.0002582480940165826
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,8,137.552,8000,-0.375,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-27_09-48-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.718460604879591
          entropy_coeff: 0.009999999999999998
          kl: 0.01222960937883593
          policy_loss: -0.03984375206960572
          total_loss: -0.06441102460440662
          vf_explained_var: -0.8434838056564331
          vf_loss: 0.00017141049271837498
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,9,148.234,9000,-0.333333,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-27_09-48-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.682468605041504
          entropy_coeff: 0.009999999999999998
          kl: 0.010493182629358168
          policy_loss: -0.03741591659684976
          total_loss: -0.06200905599527889
          vf_explained_var: -0.6192070245742798
          vf_loss: 0.0001329097007126418
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,10,159.207,10000,-0.3,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-27_09-48-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2727272727272727
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.681708264350891
          entropy_coeff: 0.009999999999999998
          kl: 0.01291343847153707
          policy_loss: 0.008813033087386025
          total_loss: -0.015226711829503378
          vf_explained_var: -0.40170586109161377
          vf_loss: 0.00019465037880258429
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,11,170.101,11000,-0.272727,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-27_09-48-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6555524932013617
          entropy_coeff: 0.009999999999999998
          kl: 0.012537563103542008
          policy_loss: -0.020764565136697558
          total_loss: -0.044588534409801164
          vf_explained_var: -0.57622230052948
          vf_loss: 0.00022404208042037984
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,12,181.055,12000,-0.25,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-27_09-48-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23076923076923078
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.689887107743157
          entropy_coeff: 0.009999999999999998
          kl: 0.00840642058701544
          policy_loss: 0.07942151054739952
          total_loss: 0.054325954119364424
          vf_explained_var: -0.03170866519212723
          vf_loss: 0.00012202818033983931
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,13,191.925,13000,-0.230769,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-27_09-49-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21428571428571427
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.677726886007521
          entropy_coeff: 0.009999999999999998
          kl: 0.012382958547477932
          policy_loss: 0.08099757788909806
          total_loss: 0.05687437421745724
          vf_explained_var: -0.8712305426597595
          vf_loss: 0.00017747031419680247
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,14,202.651,14000,-0.214286,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-27_09-49-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6460800303353205
          entropy_coeff: 0.009999999999999998
          kl: 0.010132130489357847
          policy_loss: 0.06351352532704671
          total_loss: 0.03927651767929395
          vf_explained_var: -0.7520787715911865
          vf_loss: 0.0001973647232969395
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,15,213.151,15000,-0.2,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-27_09-49-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1875
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6588867876264786
          entropy_coeff: 0.009999999999999998
          kl: 0.01185849966711733
          policy_loss: 0.05703977818290393
          total_loss: 0.032991356154282885
          vf_explained_var: -0.8823341131210327
          vf_loss: 0.0001687442731458254
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,16,223.589,16000,-0.1875,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-27_09-49-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17647058823529413
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6784042252434626
          entropy_coeff: 0.009999999999999998
          kl: 0.011205888207052055
          policy_loss: 0.042711418991287546
          total_loss: 0.018338595165146722
          vf_explained_var: -0.6857379674911499
          vf_loss: 0.00017004440746354197
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,17,234.402,17000,-0.176471,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-27_09-49-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16666666666666666
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.704558401637607
          entropy_coeff: 0.009999999999999998
          kl: 0.01229910693371786
          policy_loss: 0.06567061502072546
          total_loss: 0.04123717885878351
          vf_explained_var: -0.9646249413490295
          vf_loss: 0.00015232594353922952
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,18,245.461,18000,-0.166667,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-27_09-49-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15789473684210525
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6941337082121106
          entropy_coeff: 0.009999999999999998
          kl: 0.009575957764662756
          policy_loss: -0.035940580483939916
          total_loss: -0.060781961182753245
          vf_explained_var: -1.0
          vf_loss: 0.0001847643524039692
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,19,256.558,19000,-0.157895,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-27_09-50-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6732290347417194
          entropy_coeff: 0.009999999999999998
          kl: 0.013424420483415665
          policy_loss: 0.011729190829727384
          total_loss: -0.012116028202904595
          vf_explained_var: -0.8670452833175659
          vf_loss: 0.00020218941289284784
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,20,267.633,20000,-0.15,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-27_09-50-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14285714285714285
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.635540599293179
          entropy_coeff: 0.009999999999999998
          kl: 0.007592134645080743
          policy_loss: -0.01641912263714605
          total_loss: -0.04114143513143063
          vf_explained_var: -0.9792525172233582
          vf_loss: 0.00011466684817504656
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,21,278.749,21000,-0.142857,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-27_09-50-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13636363636363635
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6332542763815985
          entropy_coeff: 0.009999999999999998
          kl: 0.012893105649885585
          policy_loss: 0.05193514298233721
          total_loss: 0.028266762528154583
          vf_explained_var: -0.7702014446258545
          vf_loss: 8.554076507102258e-05
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,22,289.931,22000,-0.136364,0,-3,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-27_09-50-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5652173913043478
  episode_reward_min: -10.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6612953186035155
          entropy_coeff: 0.009999999999999998
          kl: 0.01282809208543719
          policy_loss: 0.06806365168756909
          total_loss: 0.327099633961916
          vf_explained_var: 0.18259534239768982
          vf_loss: 0.283083319498433
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,23,301.242,23000,-0.565217,0,-10,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-27_09-50-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0416666666666667
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.626789771185981
          entropy_coeff: 0.009999999999999998
          kl: 0.01289673387667578
          policy_loss: -0.05931640391548475
          total_loss: 0.1053502360979716
          vf_explained_var: -0.06013857573270798
          vf_loss: 0.1883551905112755
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,24,312.141,24000,-1.04167,0,-12,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-27_09-51-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6125179290771485
          entropy_coeff: 0.009999999999999998
          kl: 0.011932781526772206
          policy_loss: 0.07222083885636595
          total_loss: 0.052778958901762964
          vf_explained_var: -0.4050467312335968
          vf_loss: 0.004296745892821086
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,25,323.351,25000,-1,0,-12,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-27_09-51-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9615384615384616
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6309716860453287
          entropy_coeff: 0.009999999999999998
          kl: 0.00879371616945538
          policy_loss: -0.001782646444108751
          total_loss: -0.024680161310566797
          vf_explained_var: -0.3862658739089966
          vf_loss: 0.0016534580221761845
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,26,334.797,26000,-0.961538,0,-12,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-27_09-51-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9259259259259259
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6059876441955567
          entropy_coeff: 0.009999999999999998
          kl: 0.011444530826733586
          policy_loss: 0.06297398010889689
          total_loss: 0.03997249808162451
          vf_explained_var: -0.006823228672146797
          vf_loss: 0.0007694867328003359
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,27,345.643,27000,-0.925926,0,-12,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-27_09-51-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8928571428571429
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.560286913977729
          entropy_coeff: 0.009999999999999998
          kl: 0.00855171924867418
          policy_loss: 0.03706726150380241
          total_loss: 0.014142593120535214
          vf_explained_var: -0.013870496302843094
          vf_loss: 0.000967857293936605
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,28,356.648,28000,-0.892857,0,-12,1000


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-27_09-51-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8620689655172413
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5998176760143705
          entropy_coeff: 0.009999999999999998
          kl: 0.010702356087869678
          policy_loss: -0.011283529922366142
          total_loss: -0.03259314489033487
          vf_explained_var: -0.464955598115921
          vf_loss: 0.0025480872647474623
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,29,368.059,29000,-0.862069,0,-12,1000




Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-27_09-52-14
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8333333333333334
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5421253628200953
          entropy_coeff: 0.009999999999999998
          kl: 0.012744015329957737
          policy_loss: 0.031650001472896996
          total_loss: 0.009462033046616448
          vf_explained_var: -0.2103954255580902
          vf_loss: 0.0006844784796056855
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,30,396.718,30000,-0.833333,0,-12,996.1


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-27_09-52-25
  done: false
  episode_len_mean: 996.2258064516129
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8064516129032258
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5372197839948867
          entropy_coeff: 0.009999999999999998
          kl: 0.010848983692036832
          policy_loss: -0.07751996401283476
          total_loss: -0.10026671174499724
          vf_explained_var: -0.8695618510246277
          vf_loss: 0.00045565398007359664
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,31,407.922,31000,-0.806452,0,-12,996.226


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-27_09-52-36
  done: false
  episode_len_mean: 996.34375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.78125
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5658591667811077
          entropy_coeff: 0.009999999999999998
          kl: 0.009350220738246498
          policy_loss: 0.026071756415896947
          total_loss: 0.003117231527964274
          vf_explained_var: -0.7615180015563965
          vf_loss: 0.0008340235657265617
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,32,418.743,32000,-0.78125,0,-12,996.344


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-27_09-52-47
  done: false
  episode_len_mean: 996.4545454545455
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7575757575757576
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6012832429673938
          entropy_coeff: 0.009999999999999998
          kl: 0.010765904485337844
          policy_loss: 0.056229699154694876
          total_loss: 0.03291211985051632
          vf_explained_var: -0.25554513931274414
          vf_loss: 0.0005420734788963779
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,33,429.535,33000,-0.757576,0,-12,996.455


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-27_09-52-58
  done: false
  episode_len_mean: 996.5588235294117
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7352941176470589
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5600968493355647
          entropy_coeff: 0.009999999999999998
          kl: 0.009862764160171688
          policy_loss: 0.01375560752219624
          total_loss: -0.009270123951137067
          vf_explained_var: -0.6111779808998108
          vf_loss: 0.0006026825279049162
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,34,440.963,34000,-0.735294,0,-12,996.559


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-27_09-53-10
  done: false
  episode_len_mean: 996.6571428571428
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.7142857142857143
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5193190813064574
          entropy_coeff: 0.009999999999999998
          kl: 0.009229464555117714
          policy_loss: 0.08516740451256434
          total_loss: 0.06200485146707958
          vf_explained_var: -0.6238638162612915
          vf_loss: 0.00018474565553737598
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,35,452.108,35000,-0.714286,0,-12,996.657


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-27_09-53-21
  done: false
  episode_len_mean: 996.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6944444444444444
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.425547210375468
          entropy_coeff: 0.009999999999999998
          kl: 0.011397095534852974
          policy_loss: -0.046511819917294715
          total_loss: -0.06831131544378069
          vf_explained_var: -0.9791341423988342
          vf_loss: 0.0001765593396460948
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,36,463.724,36000,-0.694444,0,-12,996.75


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-27_09-53-33
  done: false
  episode_len_mean: 996.8378378378378
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6756756756756757
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4133993599149917
          entropy_coeff: 0.009999999999999998
          kl: 0.010674142281382018
          policy_loss: 0.06781306047406462
          total_loss: 0.045999745031197865
          vf_explained_var: -0.09480413794517517
          vf_loss: 0.00018584886044360852
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,37,475.475,37000,-0.675676,0,-12,996.838


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-27_09-53-44
  done: false
  episode_len_mean: 996.921052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6578947368421053
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4197953833474055
          entropy_coeff: 0.009999999999999998
          kl: 0.010851279022951053
          policy_loss: -0.0388837653833131
          total_loss: -0.06065206453204155
          vf_explained_var: -0.8988217115402222
          vf_loss: 0.0002593981540283292
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,38,486.645,38000,-0.657895,0,-12,996.921


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-27_09-53-56
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6410256410256411
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.335504333178202
          entropy_coeff: 0.009999999999999998
          kl: 0.007261495072691076
          policy_loss: 0.022592040648063024
          total_loss: 0.0008774920056263606
          vf_explained_var: -0.9510140419006348
          vf_loss: 0.00018819585950243184
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,39,498.03,39000,-0.641026,0,-12,997


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-27_09-54-07
  done: false
  episode_len_mean: 997.075
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.625
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.303238291210598
          entropy_coeff: 0.009999999999999998
          kl: 0.00634435874746901
          policy_loss: 0.11674577436513371
          total_loss: 0.09517655463682281
          vf_explained_var: -0.9069872498512268
          vf_loss: 0.00019429188056771334
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,40,509.12,40000,-0.625,0,-12,997.075


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-27_09-54-19
  done: false
  episode_len_mean: 997.1463414634146
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6097560975609756
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.462312939431932
          entropy_coeff: 0.009999999999999998
          kl: 0.008451272066230532
          policy_loss: 0.09995489037699169
          total_loss: 0.07707338051663505
          vf_explained_var: -0.06280287355184555
          vf_loss: 5.136595641993659e-05
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,41,521.045,41000,-0.609756,0,-12,997.146


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-27_09-54-31
  done: false
  episode_len_mean: 997.2142857142857
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5952380952380952
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3450508541531034
          entropy_coeff: 0.009999999999999998
          kl: 0.013110836210966889
          policy_loss: 0.1479820965892739
          total_loss: 0.1273753491954671
          vf_explained_var: -0.9861538410186768
          vf_loss: 0.00022159139827838064
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,42,533.569,42000,-0.595238,0,-12,997.214


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-27_09-54-43
  done: false
  episode_len_mean: 997.2790697674419
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5813953488372093
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3652758704291448
          entropy_coeff: 0.009999999999999998
          kl: 0.017469043071418434
          policy_loss: 0.04572704508900642
          total_loss: 0.025751821200052896
          vf_explained_var: -0.39358407258987427
          vf_loss: 0.00018372369862239187
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,43,544.952,43000,-0.581395,0,-12,997.279


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-27_09-54-54
  done: false
  episode_len_mean: 997.3409090909091
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5681818181818182
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.39381652408176
          entropy_coeff: 0.009999999999999998
          kl: 0.01197811427685773
          policy_loss: 0.04195513427257538
          total_loss: 0.020592757024698788
          vf_explained_var: -0.6965802907943726
          vf_loss: 0.00018016247461976793
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,44,556.372,44000,-0.568182,0,-12,997.341


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-27_09-55-05
  done: false
  episode_len_mean: 997.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5555555555555556
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.442754626274109
          entropy_coeff: 0.009999999999999998
          kl: 0.011006570879549598
          policy_loss: -0.15853114128112794
          total_loss: -0.18060511963235007
          vf_explained_var: -0.4502853751182556
          vf_loss: 0.00015225416734918125
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,45,567.521,45000,-0.555556,0,-12,997.4


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-27_09-55-16
  done: false
  episode_len_mean: 997.4565217391304
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5434782608695652
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4526505443784927
          entropy_coeff: 0.009999999999999998
          kl: 0.0104088920965168
          policy_loss: 0.029996430418557592
          total_loss: 0.007829383843474917
          vf_explained_var: 0.5549829006195068
          vf_loss: 0.00027768181842273204
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,46,578.476,46000,-0.543478,0,-12,997.457


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-27_09-55-27
  done: false
  episode_len_mean: 997.5106382978723
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5319148936170213
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4116566287146672
          entropy_coeff: 0.009999999999999998
          kl: 0.007567296366049366
          policy_loss: 0.007689807646804386
          total_loss: -0.01470093722972605
          vf_explained_var: -0.6077679395675659
          vf_loss: 0.00021236082710755807
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,47,589.494,47000,-0.531915,0,-12,997.511


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-27_09-55-38
  done: false
  episode_len_mean: 997.5625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5208333333333334
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.423216618431939
          entropy_coeff: 0.009999999999999998
          kl: 0.01231654551090868
          policy_loss: -0.13129470373193422
          total_loss: -0.15300621746314896
          vf_explained_var: -0.9094326496124268
          vf_loss: 5.734250260300339e-05
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,48,600.329,48000,-0.520833,0,-12,997.562


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-27_09-55-49
  done: false
  episode_len_mean: 997.6122448979592
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5102040816326531
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.430307242605421
          entropy_coeff: 0.009999999999999998
          kl: 0.00496159334063673
          policy_loss: -0.0004397918780644735
          total_loss: -0.02363187554809782
          vf_explained_var: -0.4644320607185364
          vf_loss: 0.00011867137555883447
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,49,611.057,49000,-0.510204,0,-12,997.612


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-27_09-56-00
  done: false
  episode_len_mean: 997.66
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.446855311923557
          entropy_coeff: 0.009999999999999998
          kl: 0.009555488702940273
          policy_loss: 0.0405749106572734
          total_loss: 0.017156982297698655
          vf_explained_var: -0.6736918687820435
          vf_loss: 9.507641591173726e-05
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,50,621.931,50000,-0.5,0,-12,997.66


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-27_09-56-11
  done: false
  episode_len_mean: 997.7058823529412
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.49019607843137253
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.444496046172248
          entropy_coeff: 0.009999999999999998
          kl: 0.012641017251916884
          policy_loss: 0.011608008129729164
          total_loss: -0.011327644892864757
          vf_explained_var: -1.0
          vf_loss: 0.0002452062804271312
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,51,632.757,51000,-0.490196,0,-12,997.706


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-27_09-56-22
  done: false
  episode_len_mean: 997.75
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4807692307692308
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3554792351192897
          entropy_coeff: 0.009999999999999998
          kl: 0.01785307893249457
          policy_loss: -0.06105543532305294
          total_loss: -0.0819253938893477
          vf_explained_var: -0.1517515629529953
          vf_loss: 0.0008995237494269127
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,52,643.915,52000,-0.480769,0,-12,997.75


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-27_09-56-33
  done: false
  episode_len_mean: 997.7924528301887
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4716981132075472
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5008601559533012
          entropy_coeff: 0.009999999999999998
          kl: 0.011555833318225364
          policy_loss: -0.062133844362364875
          total_loss: -0.08595593141184912
          vf_explained_var: -0.6297535300254822
          vf_loss: 3.0931659325182815e-05
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,53,655.468,53000,-0.471698,0,-12,997.792


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-27_09-56-44
  done: false
  episode_len_mean: 997.8333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.46296296296296297
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.437099239561293
          entropy_coeff: 0.009999999999999998
          kl: 0.010910973129386221
          policy_loss: -0.06450833943155077
          total_loss: -0.08678555265069007
          vf_explained_var: -0.4642900228500366
          vf_loss: 0.0010026760875512586
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,54,666.442,54000,-0.462963,0,-12,997.833


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-27_09-56-55
  done: false
  episode_len_mean: 997.8727272727273
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.45454545454545453
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4554296758439804
          entropy_coeff: 0.009999999999999998
          kl: 0.011565075720524436
          policy_loss: 0.10992081173592144
          total_loss: 0.08658607262704107
          vf_explained_var: -0.7791928648948669
          vf_loss: 6.304904309217818e-05
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,55,677.472,55000,-0.454545,0,-12,997.873


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-27_09-57-07
  done: false
  episode_len_mean: 997.9107142857143
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.44642857142857145
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4960766659842597
          entropy_coeff: 0.009999999999999998
          kl: 0.008763134070471029
          policy_loss: -0.09444023788803153
          total_loss: -0.1184612524178293
          vf_explained_var: -0.2177165150642395
          vf_loss: 6.34363065425229e-05
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,56,688.572,56000,-0.446429,0,-12,997.911


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-27_09-57-18
  done: false
  episode_len_mean: 997.9473684210526
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.43859649122807015
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4860143370098537
          entropy_coeff: 0.009999999999999998
          kl: 0.008532767298662078
          policy_loss: 0.08870763277841939
          total_loss: 0.06473080389615563
          vf_explained_var: -0.8163315653800964
          vf_loss: 3.0038851946301293e-05
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,57,699.463,57000,-0.438596,0,-12,997.947


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-27_09-57-29
  done: false
  episode_len_mean: 997.9827586206897
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.43103448275862066
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5146480056974623
          entropy_coeff: 0.009999999999999998
          kl: 0.011519127022995502
          policy_loss: 0.1503887211283048
          total_loss: 0.12643591024809414
          vf_explained_var: -0.7380331158638
          vf_loss: 4.1754657356957774e-05
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,58,710.598,58000,-0.431034,0,-12,997.983


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-27_09-57-40
  done: false
  episode_len_mean: 998.0169491525423
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.423728813559322
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.51912649207645
          entropy_coeff: 0.009999999999999998
          kl: 0.01738736821466611
          policy_loss: 0.25651738229725096
          total_loss: 0.23309455033805634
          vf_explained_var: -0.5351142287254333
          vf_loss: 2.9694063798362752e-05
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,59,722.143,59000,-0.423729,0,-12,998.017




Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-27_09-58-08
  done: false
  episode_len_mean: 995.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4166666666666667
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.480437080065409
          entropy_coeff: 0.009999999999999998
          kl: 0.013200492190084508
          policy_loss: -0.03543841656711366
          total_loss: -0.05854247560103734
          vf_explained_var: 0.1686076521873474
          vf_loss: 0.00038026271839852494
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,60,749.877,60000,-0.416667,0,-12,995.6


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-27_09-58-19
  done: false
  episode_len_mean: 995.672131147541
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4098360655737705
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3208749188317195
          entropy_coeff: 0.009999999999999998
          kl: 0.011709556786032721
          policy_loss: -0.01681977692577574
          total_loss: -0.038464849773380494
          vf_explained_var: -0.39424043893814087
          vf_loss: 0.0003927216927978508
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,61,760.858,61000,-0.409836,0,-12,995.672


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-27_09-58-30
  done: false
  episode_len_mean: 995.741935483871
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4032258064516129
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.479464544190301
          entropy_coeff: 0.009999999999999998
          kl: 0.011652095583870726
          policy_loss: -0.006349134465886487
          total_loss: -0.029935549034012687
          vf_explained_var: -0.4277023673057556
          vf_loss: 4.301853481997063e-05
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,62,771.791,62000,-0.403226,0,-12,995.742


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-27_09-58-41
  done: false
  episode_len_mean: 995.8095238095239
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3968253968253968
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3266677776972453
          entropy_coeff: 0.009999999999999998
          kl: 0.012239088318622806
          policy_loss: -0.09594119857582781
          total_loss: -0.11785151896377405
          vf_explained_var: -0.9996626973152161
          vf_loss: 0.00013244915803119916
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,63,782.735,63000,-0.396825,0,-12,995.81


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-27_09-58-52
  done: false
  episode_len_mean: 995.875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.390625
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4324774636162654
          entropy_coeff: 0.009999999999999998
          kl: 0.00812802860384066
          policy_loss: -0.022891997711526023
          total_loss: -0.046236639552646216
          vf_explained_var: -0.25196540355682373
          vf_loss: 0.00016732867394845621
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,64,794.045,64000,-0.390625,0,-12,995.875


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-27_09-59-04
  done: false
  episode_len_mean: 995.9384615384615
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.38461538461538464
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.339511587884691
          entropy_coeff: 0.009999999999999998
          kl: 0.015580974991816104
          policy_loss: 0.014180869112412136
          total_loss: -0.007497233235173755
          vf_explained_var: 0.019973674789071083
          vf_loss: 0.000158916115227233
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,65,805.389,65000,-0.384615,0,-12,995.938


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-27_09-59-15
  done: false
  episode_len_mean: 996.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3787878787878788
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9590937693913777
          entropy_coeff: 0.009999999999999998
          kl: 0.011116876959542794
          policy_loss: -0.09436257142159674
          total_loss: -0.11263168222374387
          vf_explained_var: -0.778717041015625
          vf_loss: 0.00021013704552185825
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,66,816.646,66000,-0.378788,0,-12,996


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-27_09-59-26
  done: false
  episode_len_mean: 996.0597014925373
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.373134328358209
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0707662290996973
          entropy_coeff: 0.009999999999999998
          kl: 0.01720860961393666
          policy_loss: 0.04426366960008939
          total_loss: 0.025507788194550407
          vf_explained_var: -0.8826278448104858
          vf_loss: 0.00023091819991047184
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,67,828.013,67000,-0.373134,0,-12,996.06


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-27_09-59-38
  done: false
  episode_len_mean: 996.1176470588235
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36764705882352944
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.255882183710734
          entropy_coeff: 0.009999999999999998
          kl: 0.014320747189460508
          policy_loss: -0.004952885458866755
          total_loss: -0.025895218716727363
          vf_explained_var: 0.010173287242650986
          vf_loss: 0.0001844172966634182
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,68,839.363,68000,-0.367647,0,-12,996.118


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-27_09-59-49
  done: false
  episode_len_mean: 996.1739130434783
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36231884057971014
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2852277358373008
          entropy_coeff: 0.009999999999999998
          kl: 0.014434838022348585
          policy_loss: -0.027509624593787724
          total_loss: -0.048668758736716375
          vf_explained_var: 0.10451435297727585
          vf_loss: 0.0002496580206449532
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,69,850.5,69000,-0.362319,0,-12,996.174


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-27_10-00-00
  done: false
  episode_len_mean: 996.2285714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.35714285714285715
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8632776379585265
          entropy_coeff: 0.009999999999999998
          kl: 0.01785937241368909
          policy_loss: -0.07228041895561749
          total_loss: -0.08876885943528678
          vf_explained_var: -0.46342891454696655
          vf_loss: 0.0003583956301251116
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,70,861.709,70000,-0.357143,0,-12,996.229


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-27_10-00-11
  done: false
  episode_len_mean: 996.2816901408451
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.352112676056338
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9351907425456576
          entropy_coeff: 0.009999999999999998
          kl: 0.009590987518400264
          policy_loss: 0.024472063634958532
          total_loss: 0.006205180142488744
          vf_explained_var: -0.906122088432312
          vf_loss: 0.00012592437932552357
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,71,873.024,71000,-0.352113,0,-12,996.282


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-27_10-00-23
  done: false
  episode_len_mean: 996.3333333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3472222222222222
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.966591571436988
          entropy_coeff: 0.009999999999999998
          kl: 0.012904804081262721
          policy_loss: -0.02659039224187533
          total_loss: -0.044868030357691976
          vf_explained_var: -1.0
          vf_loss: 9.779808367764215e-05
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,72,884.589,72000,-0.347222,0,-12,996.333


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-27_10-00-35
  done: false
  episode_len_mean: 996.3835616438356
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3424657534246575
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.994081864092085
          entropy_coeff: 0.009999999999999998
          kl: 0.009875687115463922
          policy_loss: 0.044892171894510585
          total_loss: 0.02602264554136329
          vf_explained_var: -1.0
          vf_loss: 8.372129769769445e-05
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,73,896.628,73000,-0.342466,0,-12,996.384


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-27_10-00-46
  done: false
  episode_len_mean: 996.4324324324324
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.33783783783783783
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9646952960226272
          entropy_coeff: 0.009999999999999998
          kl: 0.010049037633409528
          policy_loss: -0.014385247520274586
          total_loss: -0.03293280504229996
          vf_explained_var: -1.0
          vf_loss: 9.449231870854015e-05
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,74,907.996,74000,-0.337838,0,-12,996.432


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-27_10-00-58
  done: false
  episode_len_mean: 996.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.976941998799642
          entropy_coeff: 0.009999999999999998
          kl: 0.011440577891174936
          policy_loss: -0.026971268985006545
          total_loss: -0.04549882171882524
          vf_explained_var: -1.0
          vf_loss: 9.780802061464379e-05
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,75,919.219,75000,-0.333333,0,-12,996.48


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-27_10-01-09
  done: false
  episode_len_mean: 996.5263157894736
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.32894736842105265
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0939615779452856
          entropy_coeff: 0.009999999999999998
          kl: 0.013585545328337586
          policy_loss: 0.04455562076634831
          total_loss: 0.025076691351003117
          vf_explained_var: -1.0
          vf_loss: 0.00010213162232604291
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,76,930.324,76000,-0.328947,0,-12,996.526


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-27_10-01-20
  done: false
  episode_len_mean: 996.5714285714286
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3246753246753247
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4851537624994915
          entropy_coeff: 0.009999999999999998
          kl: 0.013092339605042917
          policy_loss: 0.10282069312201605
          total_loss: 0.07951652606328329
          vf_explained_var: -0.4075332283973694
          vf_loss: 0.00023813745259152104
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,77,941.619,77000,-0.324675,0,-12,996.571


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-27_10-01-31
  done: false
  episode_len_mean: 996.6153846153846
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.32051282051282054
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.154319405555725
          entropy_coeff: 0.009999999999999998
          kl: 0.013544718833497048
          policy_loss: -0.06992851009385453
          total_loss: -0.08994569844669766
          vf_explained_var: -0.7919886112213135
          vf_loss: 0.00017153082246497637
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,78,952.72,78000,-0.320513,0,-12,996.615


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-27_10-01-42
  done: false
  episode_len_mean: 996.6582278481013
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.31645569620253167
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0754912296930947
          entropy_coeff: 0.009999999999999998
          kl: 0.01210729897673125
          policy_loss: 0.0015200641627113023
          total_loss: -0.0178614120102591
          vf_explained_var: -1.0
          vf_loss: 0.00016270438354695215
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,79,963.879,79000,-0.316456,0,-12,996.658


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-27_10-01-54
  done: false
  episode_len_mean: 996.7
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3125
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1470501979192096
          entropy_coeff: 0.009999999999999998
          kl: 0.011002565900558252
          policy_loss: -0.033908497500750756
          total_loss: -0.054203688104947405
          vf_explained_var: -1.0
          vf_loss: 7.505360699724405e-05
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,80,975.072,80000,-0.3125,0,-12,996.7


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-27_10-02-05
  done: false
  episode_len_mean: 996.7407407407408
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.30864197530864196
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4441994455125595
          entropy_coeff: 0.009999999999999998
          kl: 0.013881351340386613
          policy_loss: -0.03801036609543695
          total_loss: -0.06093440494603581
          vf_explained_var: -0.6968477964401245
          vf_loss: 0.00012982220008173802
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,81,985.912,81000,-0.308642,0,-12,996.741


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-27_10-02-15
  done: false
  episode_len_mean: 996.780487804878
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3048780487804878
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.445192337036133
          entropy_coeff: 0.009999999999999998
          kl: 0.010098829825171051
          policy_loss: -0.04714887930701176
          total_loss: -0.07055884848038356
          vf_explained_var: -0.9317458868026733
          vf_loss: 3.2071062853194436e-05
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,82,996.694,82000,-0.304878,0,-12,996.78


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-27_10-02-26
  done: false
  episode_len_mean: 996.8192771084338
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.30120481927710846
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4451661242379084
          entropy_coeff: 0.009999999999999998
          kl: 0.01068461461128142
          policy_loss: -0.023661728410257234
          total_loss: -0.04695396356077658
          vf_explained_var: -1.0
          vf_loss: 9.096311504334962e-05
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,83,1007.63,83000,-0.301205,0,-12,996.819


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-27_10-02-37
  done: false
  episode_len_mean: 996.8571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2976190476190476
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5142599211798773
          entropy_coeff: 0.009999999999999998
          kl: 0.011256014261884733
          policy_loss: -0.06689576643208663
          total_loss: -0.09085810639792019
          vf_explained_var: -1.0
          vf_loss: 5.465846065817459e-05
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,84,1018.79,84000,-0.297619,0,-12,996.857


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-27_10-02-49
  done: false
  episode_len_mean: 996.8941176470588
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.29411764705882354
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.515455635388692
          entropy_coeff: 0.009999999999999998
          kl: 0.011675284378133198
          policy_loss: 0.0023936600734790164
          total_loss: -0.021519931240214243
          vf_explained_var: -0.9217342138290405
          vf_loss: 7.343765913295405e-05
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,85,1030.28,85000,-0.294118,0,-12,996.894


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-27_10-03-00
  done: false
  episode_len_mean: 996.9302325581396
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.29069767441860467
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5860971980624727
          entropy_coeff: 0.009999999999999998
          kl: 0.008264924803426589
          policy_loss: -0.08598445666333039
          total_loss: -0.11098221581843164
          vf_explained_var: -0.9505621790885925
          vf_loss: 3.672012190792076e-05
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,86,1041.71,86000,-0.290698,0,-12,996.93


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-27_10-03-12
  done: false
  episode_len_mean: 996.9655172413793
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.28735632183908044
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5881185399161444
          entropy_coeff: 0.009999999999999998
          kl: 0.010886381061822093
          policy_loss: -0.18314331707855067
          total_loss: -0.20787292338079877
          vf_explained_var: 0.1907006800174713
          vf_loss: 6.294369890787897e-05
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,87,1052.92,87000,-0.287356,0,-12,996.966


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-27_10-03-23
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2840909090909091
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0860192404852973
          entropy_coeff: 0.009999999999999998
          kl: 0.015539483375167418
          policy_loss: -0.08680013169844945
          total_loss: -0.10587001680913899
          vf_explained_var: -0.5601653456687927
          vf_loss: 0.00023635876350454056
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,88,1064.47,88000,-0.284091,0,-12,997


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-27_10-03-35
  done: false
  episode_len_mean: 997.0337078651686
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2808988764044944
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3523712184694077
          entropy_coeff: 0.009999999999999998
          kl: 0.013333466181854912
          policy_loss: 0.010362384302748575
          total_loss: -0.01168900157014529
          vf_explained_var: 0.19320610165596008
          vf_loss: 0.00013897816946458383
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,89,1075.83,89000,-0.280899,0,-12,997.034




Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-27_10-04-02
  done: false
  episode_len_mean: 995.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2777777777777778
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4662816921869912
          entropy_coeff: 0.009999999999999998
          kl: 0.014618149111113067
          policy_loss: -0.06823080480098724
          total_loss: -0.09134889791409175
          vf_explained_var: 0.13050125539302826
          vf_loss: 8.290317738202349e-05
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,90,1103.18,90000,-0.277778,0,-12,995.4


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-27_10-04-14
  done: false
  episode_len_mean: 995.4505494505495
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.27472527472527475
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1958285437689886
          entropy_coeff: 0.009999999999999998
          kl: 0.014575347260994714
          policy_loss: 0.04505645682414373
          total_loss: 0.02469144579437044
          vf_explained_var: -0.9798324704170227
          vf_loss: 0.0001357409550918318
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,91,1115.19,91000,-0.274725,0,-12,995.451


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-27_10-04-25
  done: false
  episode_len_mean: 995.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2717391304347826
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3614769697189333
          entropy_coeff: 0.009999999999999998
          kl: 0.014678004714618175
          policy_loss: -0.041372941931088765
          total_loss: -0.06321064001984067
          vf_explained_var: 0.14604657888412476
          vf_loss: 0.00030927007553853197
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,92,1125.98,92000,-0.271739,0,-12,995.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-27_10-04-36
  done: false
  episode_len_mean: 995.5483870967741
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.26881720430107525
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5284757534662883
          entropy_coeff: 0.009999999999999998
          kl: 0.012323660227782751
          policy_loss: -0.024588466187318168
          total_loss: -0.04837231164177259
          vf_explained_var: -0.5373808741569519
          vf_loss: 0.0002685445061716665
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,93,1136.66,93000,-0.268817,0,-12,995.548


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-27_10-04-46
  done: false
  episode_len_mean: 995.5957446808511
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.26595744680851063
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5352438926696776
          entropy_coeff: 0.009999999999999998
          kl: 0.010031122698881036
          policy_loss: 0.020282725079192057
          total_loss: -0.003952838646041022
          vf_explained_var: -0.7528144717216492
          vf_loss: 0.00011376222028047778
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,94,1147.29,94000,-0.265957,0,-12,995.596


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-27_10-04-57
  done: false
  episode_len_mean: 995.6421052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2631578947368421
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5749624013900756
          entropy_coeff: 0.009999999999999998
          kl: 0.01204502987278201
          policy_loss: -0.18602746840980317
          total_loss: -0.21048158076074389
          vf_explained_var: -0.37137654423713684
          vf_loss: 9.100819018688829e-05
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,95,1157.8,95000,-0.263158,0,-12,995.642


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-27_10-05-08
  done: false
  episode_len_mean: 995.6875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2604166666666667
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4106970522138806
          entropy_coeff: 0.009999999999999998
          kl: 0.012784088820001309
          policy_loss: -0.05201982864075237
          total_loss: -0.07465417550669776
          vf_explained_var: -0.546211838722229
          vf_loss: 0.00019421665015720968
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,96,1168.54,96000,-0.260417,0,-12,995.688


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-27_10-05-18
  done: false
  episode_len_mean: 995.7319587628866
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25773195876288657
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4636710405349733
          entropy_coeff: 0.009999999999999998
          kl: 0.008966200105216012
          policy_loss: 0.026558161661442783
          total_loss: 0.0029882450277606645
          vf_explained_var: -0.7326455116271973
          vf_loss: 0.00017017332429531963
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,97,1179.09,97000,-0.257732,0,-12,995.732


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-27_10-05-29
  done: false
  episode_len_mean: 995.7755102040817
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25510204081632654
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4576113250520493
          entropy_coeff: 0.009999999999999998
          kl: 0.01505977275867563
          policy_loss: 0.003390168315834469
          total_loss: -0.01936850580904219
          vf_explained_var: -0.465974897146225
          vf_loss: 0.0003114600219608595
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,98,1189.63,98000,-0.255102,0,-12,995.776


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-27_10-05-39
  done: false
  episode_len_mean: 995.8181818181819
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25252525252525254
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.43654474152459
          entropy_coeff: 0.009999999999999998
          kl: 0.014700124339958789
          policy_loss: -0.03249659689350261
          total_loss: -0.05517716362244553
          vf_explained_var: -0.9641844034194946
          vf_loss: 0.00021486807777869722
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,99,1200.18,99000,-0.252525,0,-12,995.818


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-27_10-05-50
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.512313355339898
          entropy_coeff: 0.009999999999999998
          kl: 0.009706112236933276
          policy_loss: -0.09734418193499247
          total_loss: -0.12133587135208977
          vf_explained_var: -0.8946531414985657
          vf_loss: 0.00016083191981629676
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,100,1210.86,100000,-0.25,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-27_10-06-01
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.500488583246867
          entropy_coeff: 0.009999999999999998
          kl: 0.011554061339139299
          policy_loss: -0.07617067458728949
          total_loss: -0.09992490427361594
          vf_explained_var: -0.09894564747810364
          vf_loss: 9.524845206922489e-05
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,101,1221.44,101000,-0.25,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-27_10-06-11
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5123241980870565
          entropy_coeff: 0.009999999999999998
          kl: 0.01145668517560153
          policy_loss: -0.08064234219491481
          total_loss: -0.10453213651974996
          vf_explained_var: -0.18743263185024261
          vf_loss: 8.777698039921233e-05
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,102,1232.02,102000,-0.25,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-27_10-06-22
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.542198716269599
          entropy_coeff: 0.009999999999999998
          kl: 0.011692136675641684
          policy_loss: -0.08370613381266594
          total_loss: -0.10783207602798939
          vf_explained_var: -0.38231921195983887
          vf_loss: 0.00012683175957338083
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,103,1242.52,103000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-27_10-06-32
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4987660937839085
          entropy_coeff: 0.009999999999999998
          kl: 0.013131179469850086
          policy_loss: -0.06456632419592805
          total_loss: -0.08803767419109741
          vf_explained_var: -0.46517160534858704
          vf_loss: 0.00020319122547031535
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,104,1252.97,104000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-27_10-06-43
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4741125716103447
          entropy_coeff: 0.009999999999999998
          kl: 0.011676632973626531
          policy_loss: -0.06076458651158545
          total_loss: -0.08416639566421509
          vf_explained_var: -0.5485209822654724
          vf_loss: 0.00017165386889246293
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,105,1263.72,105000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-27_10-06-54
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5359444936116535
          entropy_coeff: 0.009999999999999998
          kl: 0.012554039221025744
          policy_loss: -0.05826359134581354
          total_loss: -0.08220881014648411
          vf_explained_var: -1.0
          vf_loss: 0.00015881840111736285
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,106,1274.58,106000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-27_10-07-05
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.412741658422682
          entropy_coeff: 0.009999999999999998
          kl: 0.010499231353629313
          policy_loss: 0.015522919512457318
          total_loss: -0.007456866568989224
          vf_explained_var: -1.0
          vf_loss: 9.77083774665112e-05
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,107,1285.39,107000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-27_10-07-15
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.316991596751743
          entropy_coeff: 0.009999999999999998
          kl: 0.01066190524365545
          policy_loss: -0.014652973123722607
          total_loss: -0.03655144208007389
          vf_explained_var: -0.33423781394958496
          vf_loss: 0.00020525490391365666
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,108,1296.03,108000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-27_10-07-26
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3871677849027844
          entropy_coeff: 0.009999999999999998
          kl: 0.009367949348666273
          policy_loss: -0.0905936581393083
          total_loss: -0.11334290264381303
          vf_explained_var: -0.5333758592605591
          vf_loss: 0.00018563646359931834
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,109,1306.8,109000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-27_10-07-37
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4652015580071343
          entropy_coeff: 0.009999999999999998
          kl: 0.009110927573655787
          policy_loss: -0.005454500123030609
          total_loss: -0.029091759812500742
          vf_explained_var: -0.3377518653869629
          vf_loss: 0.00010366387448609911
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,110,1317.36,110000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-27_10-07-48
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4393127547370064
          entropy_coeff: 0.009999999999999998
          kl: 0.010423565351376871
          policy_loss: -0.031075719330045912
          total_loss: -0.054172804123825496
          vf_explained_var: -0.7887703776359558
          vf_loss: 0.00025368434548403863
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,111,1328.41,111000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-27_10-07-59
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4308849228752982
          entropy_coeff: 0.009999999999999998
          kl: 0.0124228629374006
          policy_loss: 0.027183395396504138
          total_loss: 0.004371066091375219
          vf_explained_var: -0.6992344856262207
          vf_loss: 0.0002542324799125911
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,112,1339.22,112000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-27_10-08-09
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4488657659954494
          entropy_coeff: 0.009999999999999998
          kl: 0.010524038661440244
          policy_loss: -0.0005680052977469233
          total_loss: -0.023695984275804625
          vf_explained_var: -0.6912607550621033
          vf_loss: 0.0003082723378207043
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,113,1350.07,113000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-27_10-08-20
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.375048679775662
          entropy_coeff: 0.009999999999999998
          kl: 0.009759398024871052
          policy_loss: 0.01174226962029934
          total_loss: -0.010745615636308988
          vf_explained_var: -0.972567081451416
          vf_loss: 0.00028666068570196835
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,114,1360.82,114000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-27_10-08-31
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4308475547366672
          entropy_coeff: 0.009999999999999998
          kl: 0.01156606003930294
          policy_loss: 0.00013366316755612692
          total_loss: -0.02271705377433035
          vf_explained_var: -0.7011638879776001
          vf_loss: 0.0003011547160794079
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,115,1371.36,115000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-27_10-08-40
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.453119407759772
          entropy_coeff: 0.009999999999999998
          kl: 0.02203691001039147
          policy_loss: -0.26220006942749025
          total_loss: -0.28421656820509167
          vf_explained_var: 0.02681116759777069
          vf_loss: 0.00031100030690949
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,116,1380.99,116000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-27_10-08-51
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.459447087181939
          entropy_coeff: 0.009999999999999998
          kl: 0.013564407329556901
          policy_loss: -0.05766338308652242
          total_loss: -0.07980961269802517
          vf_explained_var: -0.2659725844860077
          vf_loss: 0.0004135823834480511
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,117,1391.96,117000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-27_10-09-03
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3399159537421332
          entropy_coeff: 0.009999999999999998
          kl: 0.010984441265901168
          policy_loss: -0.09490214006768333
          total_loss: -0.11642094014419449
          vf_explained_var: -0.21758867800235748
          vf_loss: 0.0002326896220135192
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,118,1403.14,118000,-0.22,0,-12,995.86


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-27_10-09-14
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.3349675840801662
          entropy_coeff: 0.009999999999999998
          kl: 0.013339080889397082
          policy_loss: 0.0661469676428371
          total_loss: 0.045084558779166804
          vf_explained_var: 0.19147713482379913
          vf_loss: 0.000286403959358318
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,119,1414.24,119000,-0.22,0,-12,995.86




Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-27_10-09-41
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.323055312368605
          entropy_coeff: 0.009999999999999998
          kl: 0.011627117998634917
          policy_loss: -0.016850759088993073
          total_loss: -0.038081653705901566
          vf_explained_var: 0.0623636469244957
          vf_loss: 0.0002555872740534445
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,120,1441.8,120000,-0.22,0,-12,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-27_10-09-54
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.270085334777832
          entropy_coeff: 0.009999999999999998
          kl: 0.009752515961991613
          policy_loss: -0.07205371757348379
          total_loss: -0.0930008331934611
          vf_explained_var: 0.003582184901461005
          vf_loss: 0.0002908626417694096
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,121,1454.25,121000,-0.22,0,-12,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-27_10-10-05
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.22
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2555505408181085
          entropy_coeff: 0.009999999999999998
          kl: 0.009595969256593959
          policy_loss: -0.021736368536949158
          total_loss: -0.042629948423968424
          vf_explained_var: -0.40540796518325806
          vf_loss: 0.00022252813966285127
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,122,1465.25,122000,-0.22,0,-12,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-27_10-10-16
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.12
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.247271778848436
          entropy_coeff: 0.009999999999999998
          kl: 0.008996578306628302
          policy_loss: 0.027954529432786836
          total_loss: 0.006962629449036386
          vf_explained_var: -0.6644667983055115
          vf_loss: 0.000131331504114011
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,123,1476.27,123000,-0.12,0,-12,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-27_10-10-27
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.270600414276123
          entropy_coeff: 0.009999999999999998
          kl: 0.010725383851992268
          policy_loss: -0.11296452801260683
          total_loss: -0.13395766309565968
          vf_explained_var: -0.6531471610069275
          vf_loss: 0.00010406360140930499
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,124,1487.63,124000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-27_10-10-39
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2950886461469864
          entropy_coeff: 0.009999999999999998
          kl: 0.009341220624485608
          policy_loss: -0.030236192958222496
          total_loss: -0.05167242387930552
          vf_explained_var: -0.4263309836387634
          vf_loss: 0.00011346935643814504
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,125,1498.82,125000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-27_10-10-50
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2260587983661226
          entropy_coeff: 0.009999999999999998
          kl: 0.01099889403702796
          policy_loss: -0.03831673661867777
          total_loss: -0.058837314446767174
          vf_explained_var: -0.44266512989997864
          vf_loss: 9.017281602912893e-05
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,126,1510.19,126000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-27_10-11-01
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1929963297314115
          entropy_coeff: 0.009999999999999998
          kl: 0.009330250123228892
          policy_loss: -0.024379999190568925
          total_loss: -0.04479401947723494
          vf_explained_var: -0.9590843915939331
          vf_loss: 0.00011640513942741866
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,127,1521.54,127000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-27_10-11-13
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.2151292112138536
          entropy_coeff: 0.009999999999999998
          kl: 0.012395373805143963
          policy_loss: -0.054587363327542944
          total_loss: -0.07469122991379765
          vf_explained_var: -0.9204661250114441
          vf_loss: 0.00018812077077099174
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,128,1532.73,128000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-27_10-11-24
  done: false
  episode_len_mean: 994.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1980379237069023
          entropy_coeff: 0.009999999999999998
          kl: 0.010149024645018863
          policy_loss: -0.03005354909433259
          total_loss: -0.05038167279627588
          vf_explained_var: -0.4944753348827362
          vf_loss: 0.00012989847972575162
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,129,1543.98,129000,0,0,0,994.5


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-27_10-11-35
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.168603293100993
          entropy_coeff: 0.009999999999999998
          kl: 0.011139769462456832
          policy_loss: 0.010121205697456996
          total_loss: -0.009749749965137906
          vf_explained_var: -0.7690477967262268
          vf_loss: 0.00014411172439091993
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,130,1555.03,130000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-27_10-11-46
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.180068940586514
          entropy_coeff: 0.009999999999999998
          kl: 0.009920696732209904
          policy_loss: 0.007277895592980915
          total_loss: -0.012970662117004395
          vf_explained_var: -0.9949599504470825
          vf_loss: 6.402681489513877e-05
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,131,1566.31,131000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-27_10-11-58
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1536112705866497
          entropy_coeff: 0.009999999999999998
          kl: 0.011125395037270221
          policy_loss: -0.049575287890103126
          total_loss: -0.0693201421863503
          vf_explained_var: -0.9962068200111389
          vf_loss: 0.00012244741985543644
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,132,1577.72,132000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-27_10-12-09
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.065454708205329
          entropy_coeff: 0.009999999999999998
          kl: 0.009798131735522159
          policy_loss: -0.03954739218784703
          total_loss: -0.05862853272507588
          vf_explained_var: -0.8764340877532959
          vf_loss: 0.0001036855329503952
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,133,1589.04,133000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-27_10-12-21
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.156803486082289
          entropy_coeff: 0.009999999999999998
          kl: 0.015034947357320935
          policy_loss: -0.018491632863879205
          total_loss: -0.03770326893362734
          vf_explained_var: -0.4988389015197754
          vf_loss: 0.00010115566049838284
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,134,1601.22,134000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-27_10-12-33
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.185128492779202
          entropy_coeff: 0.009999999999999998
          kl: 0.007821342538149503
          policy_loss: -0.09094371994336446
          total_loss: -0.11154021095070574
          vf_explained_var: -0.9347925782203674
          vf_loss: 8.159318056439386e-05
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,135,1613.39,135000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-27_10-12-45
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1960924175050525
          entropy_coeff: 0.009999999999999998
          kl: 0.011506190059546522
          policy_loss: -0.0629605116115676
          total_loss: -0.08313107722335392
          vf_explained_var: -0.5047528147697449
          vf_loss: 6.443079340291054e-05
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,136,1625.38,136000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-27_10-12-57
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.179061847262912
          entropy_coeff: 0.009999999999999998
          kl: 0.00978318530491839
          policy_loss: -0.01571765591700872
          total_loss: -0.035933285703261694
          vf_explained_var: -0.4583781063556671
          vf_loss: 0.00010751232912298292
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,137,1637.07,137000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-27_10-13-08
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.128486484951443
          entropy_coeff: 0.009999999999999998
          kl: 0.009095514882196959
          policy_loss: 0.002421041371093856
          total_loss: -0.01728616551392608
          vf_explained_var: -0.4781325161457062
          vf_loss: 0.00021332968131496777
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,138,1648.47,138000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-27_10-13-20
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1003341065512764
          entropy_coeff: 0.009999999999999998
          kl: 0.010835912028468937
          policy_loss: -0.03099995921883318
          total_loss: -0.05019443883664078
          vf_explained_var: -0.676210343837738
          vf_loss: 0.00018347512878891494
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,139,1659.82,139000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-27_10-13-32
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1023738980293274
          entropy_coeff: 0.009999999999999998
          kl: 0.010601222695464221
          policy_loss: -0.07213304510547056
          total_loss: -0.09139892558256785
          vf_explained_var: -0.3365122377872467
          vf_loss: 0.0001676770193929163
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,140,1671.83,140000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-27_10-13-44
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.153299452198876
          entropy_coeff: 0.009999999999999998
          kl: 0.008987602084954242
          policy_loss: -0.09230061651517947
          total_loss: -0.1123015594151285
          vf_explained_var: -0.9773854613304138
          vf_loss: 0.00018390873930103328
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip: 192.168.1.100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,141,1683.9,141000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-27_10-13-55
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.103549999660916
          entropy_coeff: 0.009999999999999998
          kl: 0.009268246148892798
          policy_loss: -0.028026428570350013
          total_loss: -0.04754615492290921
          vf_explained_var: -0.8501831889152527
          vf_loss: 0.00012553793704783958
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 192.168.1.100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,142,1695.24,142000,0,0,0,995.67


Result for PPO_my_env_a44fc_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-27_10-14-07
  done: false
  episode_len_mean: 995.67
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 03be24429ec34374bfc1b78d8a5a014c
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.1419659548335606
          entropy_coeff: 0.009999999999999998
          kl: 0.008303343883149314
          policy_loss: -0.08544610804981656
          total_loss: -0.10540715588463677
          vf_explained_var: -0.555293619632721
          vf_loss: 0.00021311044470672237
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip: 192.168.1.100
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_a44fc_00000,RUNNING,192.168.1.100:457,143,1706.54,143000,0,0,0,995.67
