In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C8']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C8 pretrained (AnnaCNN) (3 noops after placement)"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_e6792_00000,PENDING,


2021-10-06 12:42:43,681	INFO wandb.py:170 -- Already logged into W&B.
2021-10-06 12:42:43,696	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=1133330)[0m 2021-10-06 12:42:47,899	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=1133330)[0m 2021-10-06 12:42:47,900	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1133330)[0m 2021-10-06 12:42:56,190	INFO trainable.py:109 -- Trainable.setup took 11.280 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-06_12-44-23
  done: false
  episode_len_mean: 375.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.314123249053955
          entropy_coeff: 0.009999999999999998
          kl: 0.03125776325824559
          policy_loss: -0.07530977502465248
          total_loss: -0.02365747218330701
          vf_explained_var: 0.7709622979164124
          vf_loss: 0.0685419831217991
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,1,86.9648,1000,0,0,0,375


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-06_12-44-51
  done: false
  episode_len_mean: 361.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 5
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.322902117835151
          entropy_coeff: 0.009999999999999998
          kl: 0.012161669822374637
          policy_loss: -0.06756407370169958
          total_loss: -0.04765793109933535
          vf_explained_var: 0.6702104806900024
          vf_loss: 0.039486660559972124
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.100
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,2,114.912,2000,0,0,0,361


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-06_12-45-18
  done: false
  episode_len_mean: 350.375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 8
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3784685161378647
          entropy_coeff: 0.009999999999999998
          kl: 0.012322832692780613
          policy_loss: 0.06370737404666013
          total_loss: 0.05670995715384682
          vf_explained_var: 0.8983887434005737
          vf_loss: 0.01309041828951902
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.1.100
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,3,142.492,3000,0,0,0,350.375


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-06_12-45-47
  done: false
  episode_len_mean: 346.6363636363636
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 11
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.362176521619161
          entropy_coeff: 0.009999999999999998
          kl: 0.013392603421455243
          policy_loss: 0.07080499331156413
          total_loss: 0.05847318255239063
          vf_explained_var: 0.8645395636558533
          vf_loss: 0.0072721756063401696
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,4,170.911,4000,0,0,0,346.636


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-06_12-46-15
  done: false
  episode_len_mean: 342.85714285714283
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 14
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.376375593079461
          entropy_coeff: 0.009999999999999998
          kl: 0.013742724192950738
          policy_loss: -0.014393066697650485
          total_loss: -0.03003637029065026
          vf_explained_var: 0.7836697697639465
          vf_loss: 0.0039976349721352255
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.100
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,5,199.31,5000,0,0,0,342.857


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-06_12-46-43
  done: false
  episode_len_mean: 339.11764705882354
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3854885869556
          entropy_coeff: 0.009999999999999998
          kl: 0.010513445181325033
          policy_loss: -0.02513536661863327
          total_loss: -0.043245673179626465
          vf_explained_var: 0.6319712400436401
          vf_loss: 0.0025905473519944482
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,6,226.721,6000,0,0,0,339.118


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-06_12-47-09
  done: false
  episode_len_mean: 337.65
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 20
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.365061291058858
          entropy_coeff: 0.009999999999999998
          kl: 0.011536900511820313
          policy_loss: -0.10392913694183031
          total_loss: -0.12249978979428609
          vf_explained_var: 0.2073507159948349
          vf_loss: 0.0016188948615712838
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.1.100
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,7,253.276,7000,0,0,0,337.65


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-06_12-47-34
  done: false
  episode_len_mean: 336.8695652173913
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 23
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.313660012351142
          entropy_coeff: 0.009999999999999998
          kl: 0.010862536389515122
          policy_loss: -0.103098423861795
          total_loss: -0.12220511502689785
          vf_explained_var: 0.16595174372196198
          vf_loss: 0.0007711474683472059
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,8,278.527,8000,0,0,0,336.87


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-06_12-48-00
  done: false
  episode_len_mean: 335.3076923076923
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 26
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.290780848926968
          entropy_coeff: 0.009999999999999998
          kl: 0.01384619869331654
          policy_loss: -0.16786854084995056
          total_loss: -0.18587764700253803
          vf_explained_var: 0.03023543953895569
          vf_loss: 0.0007448479109573075
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,9,304.105,9000,0,0,0,335.308




Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-06_12-48-42
  done: false
  episode_len_mean: 331.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 30
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.236787184079488
          entropy_coeff: 0.009999999999999998
          kl: 0.008242130012501372
          policy_loss: 0.027933080535795955
          total_loss: 0.008397500072088506
          vf_explained_var: -0.060134727507829666
          vf_loss: 0.00035965178071314263
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,10,345.822,10000,0,0,0,331.9


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-06_12-49-13
  done: false
  episode_len_mean: 329.09090909090907
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 33
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2810988638136123
          entropy_coeff: 0.009999999999999998
          kl: 0.008344771828471152
          policy_loss: 0.006251493013567395
          total_loss: -0.013795589448677169
          vf_explained_var: -0.5249354839324951
          vf_loss: 0.00026047463882908535
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,11,376.622,11000,0,0,0,329.091


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-06_12-49-39
  done: false
  episode_len_mean: 328.3888888888889
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 36
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.285874507162306
          entropy_coeff: 0.009999999999999998
          kl: 0.006900220482851359
          policy_loss: 0.00032045001991921
          total_loss: -0.020019838224268623
          vf_explained_var: -0.698535144329071
          vf_loss: 0.00044839028970373974
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,12,402.825,12000,0,0,0,328.389


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-06_12-50-04
  done: false
  episode_len_mean: 327.7692307692308
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 39
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3127404001024034
          entropy_coeff: 0.009999999999999998
          kl: 0.005966940454645926
          policy_loss: -0.04952314417395327
          total_loss: -0.07060728278011083
          vf_explained_var: -0.5271923542022705
          vf_loss: 0.0002531814921591528
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,13,428.037,13000,0,0,0,327.769


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-06_12-50-30
  done: false
  episode_len_mean: 327.7142857142857
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3214550760057238
          entropy_coeff: 0.009999999999999998
          kl: 0.0076867318772030225
          policy_loss: 0.0028622201540403898
          total_loss: -0.017711424910359912
          vf_explained_var: -0.9944319725036621
          vf_loss: 0.00033488523768028245
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,14,454.204,14000,0,0,0,327.714


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-06_12-50-56
  done: false
  episode_len_mean: 327.4888888888889
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 45
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2007294495900473
          entropy_coeff: 0.009999999999999998
          kl: 0.007217968058247701
          policy_loss: -0.005507272626790735
          total_loss: -0.025074481964111328
          vf_explained_var: -0.8617287278175354
          vf_loss: 0.0002746953347620244
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,15,480.258,15000,0,0,0,327.489


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-06_12-51-22
  done: false
  episode_len_mean: 327.3125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 48
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2730541202757095
          entropy_coeff: 0.009999999999999998
          kl: 0.0073819474559066916
          policy_loss: -0.02271429600401057
          total_loss: -0.04283455676502652
          vf_explained_var: -0.9602513909339905
          vf_loss: 0.000395694850804931
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,16,506.212,16000,0,0,0,327.312


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-06_12-51-48
  done: false
  episode_len_mean: 327.1372549019608
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 51
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.321487906244066
          entropy_coeff: 0.009999999999999998
          kl: 0.010456893157812905
          policy_loss: 0.010167422476742002
          total_loss: -0.009641550357143085
          vf_explained_var: -0.8112577795982361
          vf_loss: 0.00026883827449637466
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,17,531.915,17000,0,0,0,327.137


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-06_12-52-14
  done: false
  episode_len_mean: 326.94545454545454
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 55
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.310197530852424
          entropy_coeff: 0.009999999999999998
          kl: 0.007837101945060059
          policy_loss: 0.011503797935114966
          total_loss: -0.008990224243866073
          vf_explained_var: -0.9371430277824402
          vf_loss: 0.0002568220794071547
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,18,557.707,18000,0,0,0,326.945


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-06_12-52-40
  done: false
  episode_len_mean: 326.8448275862069
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 58
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3142656167348226
          entropy_coeff: 0.009999999999999998
          kl: 0.01044895249009969
          policy_loss: -0.03088379053192006
          total_loss: -0.05052395709272888
          vf_explained_var: -0.7606180906295776
          vf_loss: 0.0003678045248913501
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,19,583.459,19000,0,0,0,326.845




Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-06_12-53-24
  done: false
  episode_len_mean: 325.655737704918
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 61
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.322466278076172
          entropy_coeff: 0.009999999999999998
          kl: 0.00837210314297252
          policy_loss: -0.011901268218126562
          total_loss: -0.032379786525335574
          vf_explained_var: -0.5687780380249023
          vf_loss: 0.00023451319749357127
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,20,628.235,20000,0,0,0,325.656


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-06_12-53-50
  done: false
  episode_len_mean: 325.125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 64
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3498921553293863
          entropy_coeff: 0.009999999999999998
          kl: 0.0067338021432290455
          policy_loss: 0.004905929954515563
          total_loss: -0.016370230354368687
          vf_explained_var: -0.625443160533905
          vf_loss: 0.0002026210407267273
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,21,654.261,21000,0,0,0,325.125


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-06_12-54-16
  done: false
  episode_len_mean: 326.0149253731343
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 67
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.348033645417955
          entropy_coeff: 0.009999999999999998
          kl: 0.005441397772202588
          policy_loss: -0.0060876026956571475
          total_loss: -0.026999319758680133
          vf_explained_var: -0.9221914410591125
          vf_loss: 0.0009362005099925833
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,22,679.782,22000,0,0,0,326.015


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-06_12-54-41
  done: false
  episode_len_mean: 326.4714285714286
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 70
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2689546505610148
          entropy_coeff: 0.009999999999999998
          kl: 0.008626306763759119
          policy_loss: -0.04618263269464175
          total_loss: -0.06573387278864781
          vf_explained_var: -0.8899770379066467
          vf_loss: 0.0005504127910222047
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,23,705.088,23000,0,0,0,326.471


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-06_12-55-06
  done: false
  episode_len_mean: 327.6027397260274
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.4056336773766414
          entropy_coeff: 0.009999999999999998
          kl: 0.005864289224910212
          policy_loss: -0.029971211900313696
          total_loss: -0.051674305357866816
          vf_explained_var: -0.5788211822509766
          vf_loss: 0.000593957603086892
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,24,729.821,24000,0,0,0,327.603


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-06_12-55-31
  done: false
  episode_len_mean: 327.9736842105263
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 76
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.389535975456238
          entropy_coeff: 0.009999999999999998
          kl: 0.014476585233006862
          policy_loss: -0.04547526128590107
          total_loss: -0.06468831093774902
          vf_explained_var: -0.8937914371490479
          vf_loss: 0.00033933703064879713
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,25,754.556,25000,0,0,0,327.974


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-06_12-55-56
  done: false
  episode_len_mean: 328.27848101265823
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 79
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.408798329035441
          entropy_coeff: 0.009999999999999998
          kl: 0.006921102380545029
          policy_loss: -0.021830982776979604
          total_loss: -0.042934997379779814
          vf_explained_var: -0.5182417631149292
          vf_loss: 0.0009076392904616013
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 192.168.1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,26,779.41,26000,0,0,0,328.278


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-06_12-56-20
  done: false
  episode_len_mean: 328.890243902439
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 82
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3735316620932685
          entropy_coeff: 0.009999999999999998
          kl: 0.009872801160536572
          policy_loss: -0.023671327489945625
          total_loss: -0.04370854198932648
          vf_explained_var: -0.9058646559715271
          vf_loss: 0.000736261936455978
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,27,803.924,27000,0,0,0,328.89


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-06_12-56-44
  done: false
  episode_len_mean: 329.95238095238096
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 84
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.208626813358731
          entropy_coeff: 0.009999999999999998
          kl: 0.010744365376582958
          policy_loss: -0.047010169240335624
          total_loss: -0.06448256656941441
          vf_explained_var: -0.9744651913642883
          vf_loss: 0.0013905608179306404
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 192.168.1.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,28,827.999,28000,0,0,0,329.952


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-06_12-57-10
  done: false
  episode_len_mean: 329.8965517241379
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 87
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.253613519668579
          entropy_coeff: 0.009999999999999998
          kl: 0.010521932501262734
          policy_loss: -0.07505845816598998
          total_loss: -0.09386778647700945
          vf_explained_var: -0.8164140582084656
          vf_loss: 0.0005702261560751747
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 192.168.1.100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,29,853.26,29000,0,0,0,329.897




Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-06_12-57-49
  done: false
  episode_len_mean: 330.24444444444447
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 90
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3061464495129056
          entropy_coeff: 0.009999999999999998
          kl: 0.010082798817364635
          policy_loss: -0.06554363473421998
          total_loss: -0.08511059929927191
          vf_explained_var: -1.0
          vf_loss: 0.00046965819120588193
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168.1.100
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,30,892.681,30000,0,0,0,330.244


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-06_12-58-17
  done: false
  episode_len_mean: 330.68817204301075
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 93
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.35583078066508
          entropy_coeff: 0.009999999999999998
          kl: 0.007577602962546671
          policy_loss: 0.016388566460874346
          total_loss: -0.004228497710492876
          vf_explained_var: -1.0
          vf_loss: 0.0006679625733037634
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip: 192.168.1.100
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,31,920.107,31000,0,0,0,330.688


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-06_12-58-40
  done: false
  episode_len_mean: 331.625
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 96
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.407492240269979
          entropy_coeff: 0.009999999999999998
          kl: 0.009487561775224702
          policy_loss: -0.030074406953321562
          total_loss: -0.05063056796789169
          vf_explained_var: -0.9856142401695251
          vf_loss: 0.0006724917564295336
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,32,943.233,32000,0,0,0,331.625


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-06_12-59-03
  done: false
  episode_len_mean: 332.67676767676767
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 99
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.403721496793959
          entropy_coeff: 0.009999999999999998
          kl: 0.005912229759308948
          policy_loss: -0.06254274518125587
          total_loss: -0.08431535512208939
          vf_explained_var: -0.9985917806625366
          vf_loss: 0.0004909348277174609
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  node_ip: 192.168.1.10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,33,966.616,33000,0,0,0,332.677


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-06_12-59-28
  done: false
  episode_len_mean: 332.03
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 102
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2780280960930717
          entropy_coeff: 0.009999999999999998
          kl: 0.008803403149943417
          policy_loss: -0.05441572086678611
          total_loss: -0.07405948518878884
          vf_explained_var: -0.9877132177352905
          vf_loss: 0.000495494605274871
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,34,991.819,34000,0,0,0,332.03


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-06_12-59-53
  done: false
  episode_len_mean: 331.91
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2034784065352544
          entropy_coeff: 0.009999999999999998
          kl: 0.007870286726643859
          policy_loss: -0.04719396945503023
          total_loss: -0.06634017324282063
          vf_explained_var: -0.936732292175293
          vf_loss: 0.0005274936066901622
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,35,1016.57,35000,0,0,0,331.91


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-06_13-00-20
  done: false
  episode_len_mean: 331.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 107
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.097370070881314
          entropy_coeff: 0.009999999999999998
          kl: 0.009876248191967956
          policy_loss: -0.06713449996378687
          total_loss: -0.08469131348861589
          vf_explained_var: -0.8037344813346863
          vf_loss: 0.00045401279397386436
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,36,1042.93,36000,0,0,0,331.86


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-06_13-00-45
  done: false
  episode_len_mean: 331.54
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 110
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.147047514385647
          entropy_coeff: 0.009999999999999998
          kl: 0.007426045684688552
          policy_loss: -0.03157592833869987
          total_loss: -0.050217043567034936
          vf_explained_var: -0.8911110758781433
          vf_loss: 0.0006015459078803865
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,37,1068.59,37000,0,0,0,331.54


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-06_13-01-12
  done: false
  episode_len_mean: 331.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 113
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.10894718170166
          entropy_coeff: 0.009999999999999998
          kl: 0.010579781437267123
          policy_loss: -0.024451651258601083
          total_loss: -0.041768253677421145
          vf_explained_var: -0.9145022630691528
          vf_loss: 0.000598932343806761
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,38,1094.82,38000,0,0,0,331.99


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-06_13-01-39
  done: false
  episode_len_mean: 332.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 4
  episodes_total: 117
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0892696804470487
          entropy_coeff: 0.009999999999999998
          kl: 0.009036276780107579
          policy_loss: -0.07493243370619085
          total_loss: -0.09257437605410815
          vf_explained_var: -0.9231986403465271
          vf_loss: 0.000539873615748042
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,39,1122.51,39000,0,0,0,332.24




Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-06_13-02-21
  done: false
  episode_len_mean: 332.08
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1570902877383764
          entropy_coeff: 0.009999999999999998
          kl: 0.008236065249003843
          policy_loss: -0.06079000437425242
          total_loss: -0.07912651745395528
          vf_explained_var: -0.7133936285972595
          vf_loss: 0.000763570097883025
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,40,1164.38,40000,0,0,0,332.08


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-06_13-02-49
  done: false
  episode_len_mean: 332.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 123
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3250815682941015
          entropy_coeff: 0.009999999999999998
          kl: 0.011092340554225031
          policy_loss: -0.05347403647998969
          total_loss: -0.07279929456611474
          vf_explained_var: -0.215880885720253
          vf_loss: 0.0005978553599561565
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,41,1192.34,41000,0,0,0,332.43


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-06_13-03-16
  done: false
  episode_len_mean: 332.77
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 126
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.120810332563188
          entropy_coeff: 0.009999999999999998
          kl: 0.012061861135919847
          policy_loss: -0.06035431012925174
          total_loss: -0.07678661919716331
          vf_explained_var: -0.7408331036567688
          vf_loss: 0.0011572327495539664
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,42,1218.94,42000,0,0,0,332.77


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-06_13-03-44
  done: false
  episode_len_mean: 332.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 129
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9153467734654746
          entropy_coeff: 0.009999999999999998
          kl: 0.01085485517591908
          policy_loss: -0.020947319641709327
          total_loss: -0.03575647593372398
          vf_explained_var: -1.0
          vf_loss: 0.0010878545487584131
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip: 192.168.1.100
  num_healthy_workers: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,43,1247.04,43000,0,0,0,332.96


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-06_13-04-09
  done: false
  episode_len_mean: 334.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 132
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1626873705122205
          entropy_coeff: 0.009999999999999998
          kl: 0.011524925953223509
          policy_loss: -0.0027407256265481314
          total_loss: -0.019972376939323213
          vf_explained_var: -0.0691569447517395
          vf_loss: 0.0009377434303233814
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  node_ip: 192.168.1.100
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,44,1272.61,44000,0,0,0,334.4


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-06_13-04-38
  done: false
  episode_len_mean: 334.37
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 135
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8971095893118117
          entropy_coeff: 0.009999999999999998
          kl: 0.016809751043756982
          policy_loss: -0.05067929575840632
          total_loss: -0.06324128276771969
          vf_explained_var: -0.9059434533119202
          vf_loss: 0.001366183002311219
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,45,1301.59,45000,0,0,0,334.37


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-06_13-05-06
  done: false
  episode_len_mean: 335.09
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 138
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1644340382681952
          entropy_coeff: 0.009999999999999998
          kl: 0.009941847155828616
          policy_loss: -0.013617851605845822
          total_loss: -0.03154214207703868
          vf_explained_var: -1.0
          vf_loss: 0.0007374961330141458
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  node_ip: 192.168.1.100
  num_healthy_workers:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,46,1329.29,46000,0,0,0,335.09


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-06_13-05-44
  done: false
  episode_len_mean: 334.17
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 141
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.552020165655348
          entropy_coeff: 0.009999999999999998
          kl: 0.012966486947421963
          policy_loss: 0.0002511448330349392
          total_loss: -0.005128479583395852
          vf_explained_var: -0.5727739334106445
          vf_loss: 0.0062506320524132915
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  node_ip: 192.168.1.100
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,47,1366.7,47000,0,0,0,334.17


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-06_13-06-15
  done: false
  episode_len_mean: 333.48
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 144
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0591684367921617
          entropy_coeff: 0.009999999999999998
          kl: 0.010981980479081404
          policy_loss: -0.04011547983520561
          total_loss: -0.05345658043192492
          vf_explained_var: -0.7357677221298218
          vf_loss: 0.003955989455183347
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,48,1398.08,48000,0,0,0,333.48


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-06_13-06-46
  done: false
  episode_len_mean: 333.44
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 147
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0325666533576117
          entropy_coeff: 0.009999999999999998
          kl: 0.019823614067207944
          policy_loss: -0.10895727475484213
          total_loss: -0.12172380089759827
          vf_explained_var: 0.08389554172754288
          vf_loss: 0.0016120561896564645
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,49,1429.36,49000,0,0,0,333.44




Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-06_13-07-29
  done: false
  episode_len_mean: 333.4
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 150
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.252225504981147
          entropy_coeff: 0.009999999999999998
          kl: 0.016479285317874945
          policy_loss: -0.1177473639862405
          total_loss: -0.13404225996798938
          vf_explained_var: 0.31422150135040283
          vf_loss: 0.0012835713530269762
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.1.100
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,50,1472.4,50000,0,0,0,333.4


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-06_13-07-58
  done: false
  episode_len_mean: 333.88
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.935144805908203
          entropy_coeff: 0.009999999999999998
          kl: 0.014093096071284113
          policy_loss: -0.09362847014433807
          total_loss: -0.10631914840390284
          vf_explained_var: 0.016667569056153297
          vf_loss: 0.0024328428850923147
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node_ip: 192.168.1.100
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,51,1501.29,51000,0,0,0,333.88


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-06_13-08-29
  done: false
  episode_len_mean: 335.2
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 156
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0375153753492565
          entropy_coeff: 0.009999999999999998
          kl: 0.015136936425567448
          policy_loss: -0.08249399620625708
          total_loss: -0.0969814233481884
          vf_explained_var: -0.2819005250930786
          vf_loss: 0.0013466459711910122
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_ip: 192.168.1.100
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,52,1532.17,52000,0,0,0,335.2


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-06_13-08-59
  done: false
  episode_len_mean: 335.72
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 159
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9296559797392951
          entropy_coeff: 0.009999999999999998
          kl: 0.013098978658695692
          policy_loss: -0.10042593197690117
          total_loss: -0.11470966852373547
          vf_explained_var: -0.8627484440803528
          vf_loss: 0.001083129344947843
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,53,1561.84,53000,0,0,0,335.72


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-06_13-09-30
  done: false
  episode_len_mean: 336.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 162
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9911303149329291
          entropy_coeff: 0.009999999999999998
          kl: 0.012616515569734648
          policy_loss: -0.08666903773943584
          total_loss: -0.1017064266734653
          vf_explained_var: -0.6622142195701599
          vf_loss: 0.0010889576592793067
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 54
  node_ip: 192.168.1.100
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,54,1592.77,54000,0,0,0,336.41


Result for PPO_my_env_e6792_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-06_13-10-04
  done: false
  episode_len_mean: 336.13
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 165
  experiment_id: 9621e3adcf6148feac28dfc5a7380f79
  hostname: linar-B360M-D2V
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.009744980600145
          entropy_coeff: 0.009999999999999998
          kl: 0.01235479926881623
          policy_loss: -0.021569707265330685
          total_loss: -0.037243089597258305
          vf_explained_var: -0.0017711579566821456
          vf_loss: 0.0007176288096363552
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  node_ip: 192.168.1.100
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_e6792_00000,RUNNING,192.168.1.100:1133330,55,1627.18,55000,0,0,0,336.13
