In [1]:
#!pip3 install ray torch torchvision tabulate tensorboard
#!pip3 install 'ray[rllib]'
#!pip3 install ray

In [2]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

from models import VisualEncoder
from train import *



In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder(features_dim)
        self.encoder.load_state_dict(
            torch.load("/src/Visual Autoencoder weights and models/IGLU_encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C22']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C22 pretrained"
                  }
              }

        },
        loggers=[WandbLogger])#callbacks=[
        #    CustomLoggerCallback(),
        #])



Trial name,status,loc
PPO_my_env_ccae9_00000,PENDING,


2021-09-17 08:54:00,356	INFO wandb.py:170 -- Already logged into W&B.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[2m[36m(pid=177)[0m 2021-09-17 08:54:06,313	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=177)[0m 2021-09-17 08:54:06,313	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=177)[0m 2021-09-17 08:54:32,399	INFO trainable.py:109 -- Trainable.setup took 30.337 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-17_08-56-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 2.0
  episode_reward_min: 2.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638701505131191
          entropy_coeff: 0.009999999999999998
          kl: 0.007371294528260108
          policy_loss: -0.14228328110443222
          total_loss: -0.055882537023474774
          vf_explained_var: -0.2014622986316681
          vf_loss: 0.09756518351948923
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,1,135.274,1000,2,2,2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-17_08-57-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3267443339029947
          entropy_coeff: 0.009999999999999998
          kl: 0.0029203277595236444
          policy_loss: -0.1886763532956441
          total_loss: -0.14949229419645335
          vf_explained_var: -0.2841486632823944
          vf_loss: 0.0518674381594691
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,2,150.985,2000,1,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-17_08-57-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.3333333333333333
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.0641983840200635
          entropy_coeff: 0.009999999999999998
          kl: 0.009514609790496802
          policy_loss: -0.13647007072965303
          total_loss: -0.09078157742818196
          vf_explained_var: 0.5495882034301758
          vf_loss: 0.05537901787708203
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,3,166.204,3000,1.33333,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-17_08-57-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1945028980573018
          entropy_coeff: 0.009999999999999998
          kl: 0.00492661648780681
          policy_loss: -0.056288139977388914
          total_loss: -0.03694659185906251
          vf_explained_var: -0.05728582665324211
          vf_loss: 0.030793915544119147
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.1.96
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,4,181.41,4000,1,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-17_08-57-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.2
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.5039758493502935
          entropy_coeff: 0.009999999999999998
          kl: 0.010314543527916461
          policy_loss: -0.14678724780678748
          total_loss: -0.12443516792522537
          vf_explained_var: 0.11248816549777985
          vf_loss: 0.026876108850248985
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.1.96
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,5,196.377,5000,1.2,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-17_08-58-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8619716909196642
          entropy_coeff: 0.009999999999999998
          kl: 0.008421977148433946
          policy_loss: -0.15512842055824067
          total_loss: -0.14068603875736396
          vf_explained_var: -0.275292307138443
          vf_loss: 0.02264099792163405
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 192.168.1.96
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,6,211.064,6000,1,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-17_08-58-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.8571428571428571
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.828158105744256
          entropy_coeff: 0.009999999999999998
          kl: 0.00547120779050153
          policy_loss: -0.17185391618145837
          total_loss: -0.16623784115331042
          vf_explained_var: -0.3235822021961212
          vf_loss: 0.013624096366887292
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,7,226.004,7000,0.857143,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-17_08-58-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 1.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.35305360158284504
          entropy_coeff: 0.009999999999999998
          kl: 0.0037894813533234303
          policy_loss: -0.18278620996408992
          total_loss: -0.17254396403829256
          vf_explained_var: 0.5013548731803894
          vf_loss: 0.013583308297933803
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.168.1.96
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,8,240.77,8000,1,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-17_08-58-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.8888888888888888
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 0.4959735459751553
          entropy_coeff: 0.009999999999999998
          kl: 0.005207974388619855
          policy_loss: -0.15570432568589845
          total_loss: -0.14782057479023933
          vf_explained_var: -0.3123420476913452
          vf_loss: 0.012713286562615798
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,9,254.867,9000,0.888889,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-17_08-59-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.8
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 0.4483474761247635
          entropy_coeff: 0.009999999999999998
          kl: 0.0008588785514054583
          policy_loss: -0.16754555934005314
          total_loss: -0.16417291255460845
          vf_explained_var: -0.3616377115249634
          vf_loss: 0.007834648139153917
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 192.168.1.96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,10,269.4,10000,0.8,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-17_08-59-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.7272727272727273
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.4803800665669971
          entropy_coeff: 0.009999999999999998
          kl: 0.0022142929054215048
          policy_loss: -0.16848707497119902
          total_loss: -0.16770227940546142
          vf_explained_var: -0.277459979057312
          vf_loss: 0.005560916120238188
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,11,283.711,11000,0.727273,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-17_08-59-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6666666666666666
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.45173031522168056
          entropy_coeff: 0.009999999999999998
          kl: 0.004001243547247668
          policy_loss: -0.17224202288521662
          total_loss: -0.17275281217363145
          vf_explained_var: -0.2397492229938507
          vf_loss: 0.003981506581314736
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,12,298.192,12000,0.666667,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-17_08-59-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6153846153846154
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.4392571965853373
          entropy_coeff: 0.009999999999999998
          kl: 0.001312520665117805
          policy_loss: -0.17031776673263974
          total_loss: -0.17190882083442477
          vf_explained_var: -0.315489798784256
          vf_loss: 0.002797417849716213
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,13,312.867,13000,0.615385,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-17_08-59-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5714285714285714
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.4425276666879654
          entropy_coeff: 0.009999999999999998
          kl: 0.001342836949564546
          policy_loss: -0.16975600288973913
          total_loss: -0.1721155012647311
          vf_explained_var: -0.3182525932788849
          vf_loss: 0.002063680479639313
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,14,326.904,14000,0.571429,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-17_09-00-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5333333333333333
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.3317614522245195
          entropy_coeff: 0.009999999999999998
          kl: 0.011419471095434928
          policy_loss: -0.1702460648285018
          total_loss: -0.1719860452744696
          vf_explained_var: -0.33333316445350647
          vf_loss: 0.0015687122889277009
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,15,340.861,15000,0.533333,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-17_09-00-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.625
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.5499187615182665
          entropy_coeff: 0.009999999999999998
          kl: 0.029796191280851217
          policy_loss: -0.13256903667416836
          total_loss: -0.13108960866100258
          vf_explained_var: 0.5581504106521606
          vf_loss: 0.006955334433991488
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.168.1.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,16,354.885,16000,0.625,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-17_09-00-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.7058823529411765
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001171875
          cur_lr: 5.000000000000001e-05
          entropy: 0.5476481669478946
          entropy_coeff: 0.009999999999999998
          kl: 0.009714132739995307
          policy_loss: -0.16685405688153374
          total_loss: -0.16519847677813637
          vf_explained_var: 0.5546317100524902
          vf_loss: 0.007120680441019229
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,17,368.921,17000,0.705882,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-17_09-00-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6666666666666666
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001171875
          cur_lr: 5.000000000000001e-05
          entropy: 0.41486725012461345
          entropy_coeff: 0.009999999999999998
          kl: 0.0022755200169686735
          policy_loss: -0.12327586867743068
          total_loss: -0.12263859469029638
          vf_explained_var: -0.34633469581604004
          vf_loss: 0.004783279329745306
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,18,382.464,18000,0.666667,2,0,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-17_09-01-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5789473684210527
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005859375
          cur_lr: 5.000000000000001e-05
          entropy: 0.5045332570870718
          entropy_coeff: 0.009999999999999998
          kl: 0.0060724824405753
          policy_loss: -0.12294840498103035
          total_loss: -0.11282028042607838
          vf_explained_var: 0.35318514704704285
          vf_loss: 0.015169900636505595
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,19,396.464,19000,0.578947,2,-1,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-17_09-01-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.45
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005859375
          cur_lr: 5.000000000000001e-05
          entropy: 1.001783232556449
          entropy_coeff: 0.009999999999999998
          kl: 0.07560953322294447
          policy_loss: 0.08567170715994304
          total_loss: 0.10726093161437246
          vf_explained_var: 0.6416773200035095
          vf_loss: 0.03156275768867797
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 192.168.1.96
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,20,410.554,20000,0.45,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-17_09-01-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.42857142857142855
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008789062500000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.1558389630582597
          entropy_coeff: 0.009999999999999998
          kl: 0.034036156612104504
          policy_loss: -0.017310090073280864
          total_loss: 0.005914414218730396
          vf_explained_var: 0.19238783419132233
          vf_loss: 0.034752979760782586
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,21,424.696,21000,0.428571,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-17_09-01-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013183593749999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.099529633257124
          entropy_coeff: 0.009999999999999998
          kl: 0.03837734177244413
          policy_loss: -0.23386686763001813
          total_loss: -0.22659129401048025
          vf_explained_var: 0.6783127188682556
          vf_loss: 0.018220275888840357
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 192.168.1.96
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,22,438.284,22000,0.5,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-17_09-02-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5652173913043478
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 1.1219972531000773
          entropy_coeff: 0.009999999999999998
          kl: 0.026085655658302738
          policy_loss: -0.09167659220596154
          total_loss: -0.07817328551577198
          vf_explained_var: 0.7809060215950012
          vf_loss: 0.0246716967318207
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,23,451.924,23000,0.565217,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-17_09-02-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.625
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 0.9722165875964695
          entropy_coeff: 0.009999999999999998
          kl: 0.03491051173355623
          policy_loss: -0.11382555663585663
          total_loss: -0.10519747601615058
          vf_explained_var: 0.3942827582359314
          vf_loss: 0.018246692211121424
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 192.168.1.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,24,465.323,24000,0.625,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-17_09-02-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.68
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004449462890624999
          cur_lr: 5.000000000000001e-05
          entropy: 0.5410435696442922
          entropy_coeff: 0.009999999999999998
          kl: 0.003997754675962284
          policy_loss: -0.12563635839356316
          total_loss: -0.1239927505246467
          vf_explained_var: -0.0812818855047226
          vf_loss: 0.007036255724314187
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 192.168.1.96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,25,478.832,25000,0.68,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-17_09-02-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6538461538461539
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022247314453124993
          cur_lr: 5.000000000000001e-05
          entropy: 0.8724494652615653
          entropy_coeff: 0.009999999999999998
          kl: 0.03265438811278736
          policy_loss: -0.20590276138650046
          total_loss: -0.17742193076345655
          vf_explained_var: 0.6193729043006897
          vf_loss: 0.03713267985213962
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,26,492.203,26000,0.653846,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-17_09-02-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.7037037037037037
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 0.5583961400720808
          entropy_coeff: 0.009999999999999998
          kl: 0.0026772820438552003
          policy_loss: -0.011963810357782576
          total_loss: -0.011113236678971185
          vf_explained_var: 0.9051923751831055
          vf_loss: 0.0064255994149587225
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,27,505.756,27000,0.703704,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-17_09-03-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6785714285714286
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 0.7061865376101599
          entropy_coeff: 0.009999999999999998
          kl: 0.005068314965082739
          policy_loss: -0.16883482897861135
          total_loss: -0.15232352591637108
          vf_explained_var: 0.5424697399139404
          vf_loss: 0.02356471338102387
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,28,519.399,28000,0.678571,2,-2,1000


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-17_09-03-26
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6551724137931034
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9651034759150611
          entropy_coeff: 0.009999999999999998
          kl: 0.008039568173862774
          policy_loss: -0.19940556548535823
          total_loss: -0.19593876428488227
          vf_explained_var: 0.29955098032951355
          vf_loss: 0.013104423236412307
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,29,532.921,29000,0.655172,2,-2,1000




Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-17_09-03-57
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6333333333333333
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9800361189577315
          entropy_coeff: 0.009999999999999998
          kl: 0.009448464897438037
          policy_loss: -0.05048242857058843
          total_loss: -0.053174827992916104
          vf_explained_var: 0.5123451352119446
          vf_loss: 0.00709219681771679
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,30,563.68,30000,0.633333,2,-2,996.1


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-17_09-04-14
  done: false
  episode_len_mean: 996.2258064516129
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6129032258064516
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2579226202434963
          entropy_coeff: 0.009999999999999998
          kl: 0.018370994522461753
          policy_loss: 0.0061354890051815245
          total_loss: 0.0002695647378762563
          vf_explained_var: -0.2760816216468811
          vf_loss: 0.006682649190123711
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,31,580.734,31000,0.612903,2,-2,996.226


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-17_09-04-29
  done: false
  episode_len_mean: 996.34375
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.59375
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 1.52997324930297
          entropy_coeff: 0.009999999999999998
          kl: 0.013705651420265141
          policy_loss: -0.06156104418138663
          total_loss: -0.06858886174029774
          vf_explained_var: -0.1084279790520668
          vf_loss: 0.008249044956432448
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,32,595.557,32000,0.59375,2,-2,996.344


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-17_09-04-44
  done: false
  episode_len_mean: 996.4545454545455
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.6363636363636364
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 1.5048811793327332
          entropy_coeff: 0.009999999999999998
          kl: 0.01766805714609983
          policy_loss: 0.05744072389271524
          total_loss: 0.09846798450582557
          vf_explained_var: 0.47142961621284485
          vf_loss: 0.056046592221698825
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,33,610.503,33000,0.636364,2,-2,996.455


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-17_09-04-58
  done: false
  episode_len_mean: 996.5588235294117
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5588235294117647
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001668548583984375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9909116268157959
          entropy_coeff: 0.009999999999999998
          kl: 0.020540787177666706
          policy_loss: -0.10518386099073622
          total_loss: -0.043079135070244474
          vf_explained_var: 0.31721174716949463
          vf_loss: 0.07197956755343411
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,34,625.134,34000,0.558824,2,-2,996.559


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-17_09-05-13
  done: false
  episode_len_mean: 996.6571428571428
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5428571428571428
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.1980020549562242
          entropy_coeff: 0.009999999999999998
          kl: 0.014988034514400539
          policy_loss: 0.04083139565255907
          total_loss: 0.06585677787661552
          vf_explained_var: 0.10480692237615585
          vf_loss: 0.03696789021293322
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,35,640.172,35000,0.542857,2,-2,996.657


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-17_09-05-28
  done: false
  episode_len_mean: 996.75
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.5
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.2688809441195594
          entropy_coeff: 0.009999999999999998
          kl: 0.022416949022988162
          policy_loss: 0.025615384595261678
          total_loss: 0.11645603742864397
          vf_explained_var: 0.31916046142578125
          vf_loss: 0.10347335769070519
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node_ip: 192.168.1.96
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,36,654.955,36000,0.5,2,-2,996.75


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-17_09-05-43
  done: false
  episode_len_mean: 996.8378378378378
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.43243243243243246
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0037542343139648424
          cur_lr: 5.000000000000001e-05
          entropy: 1.3352697014808654
          entropy_coeff: 0.009999999999999998
          kl: 0.019109469266848065
          policy_loss: -0.010520291659567091
          total_loss: 0.04413586917022864
          vf_explained_var: -0.029090436175465584
          vf_loss: 0.06793711843589942
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,37,669.564,37000,0.432432,2,-2,996.838


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-17_09-05-58
  done: false
  episode_len_mean: 996.921052631579
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.42105263157894735
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0037542343139648424
          cur_lr: 5.000000000000001e-05
          entropy: 1.423187584347195
          entropy_coeff: 0.009999999999999998
          kl: 0.012306355295905015
          policy_loss: 0.0008854208721054925
          total_loss: 0.10031969083680047
          vf_explained_var: 0.2470291405916214
          vf_loss: 0.1136199451982975
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,38,684.808,38000,0.421053,2,-2,996.921


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-17_09-06-13
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.4358974358974359
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0037542343139648424
          cur_lr: 5.000000000000001e-05
          entropy: 1.0268018358283573
          entropy_coeff: 0.009999999999999998
          kl: 0.007222873609032337
          policy_loss: -0.1643661199344529
          total_loss: -0.16513575828737684
          vf_explained_var: 0.05441375449299812
          vf_loss: 0.009471259998261101
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,39,699.429,39000,0.435897,2,-2,997


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-17_09-06-28
  done: false
  episode_len_mean: 997.075
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.425
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0037542343139648424
          cur_lr: 5.000000000000001e-05
          entropy: 1.2724582890669505
          entropy_coeff: 0.009999999999999998
          kl: 0.020299708089104584
          policy_loss: -0.06701360613935524
          total_loss: -0.0738712535964118
          vf_explained_var: -0.6941999793052673
          vf_loss: 0.005790725600026134
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 192.168.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,40,714.308,40000,0.425,2,-2,997.075


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-17_09-06-43
  done: false
  episode_len_mean: 997.1463414634146
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.4146341463414634
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005631351470947267
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221750464704302
          entropy_coeff: 0.009999999999999998
          kl: 0.012888935152703675
          policy_loss: 0.014162208181288508
          total_loss: 0.16635196142726474
          vf_explained_var: 0.3370513319969177
          vf_loss: 0.16433891728520394
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,41,729.529,41000,0.414634,2,-2,997.146


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-17_09-06-57
  done: false
  episode_len_mean: 997.2142857142857
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.4523809523809524
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005631351470947267
          cur_lr: 5.000000000000001e-05
          entropy: 1.1311080932617188
          entropy_coeff: 0.009999999999999998
          kl: 0.011723252714345397
          policy_loss: -0.057966456727849114
          total_loss: 0.08820444003989299
          vf_explained_var: 0.423430860042572
          vf_loss: 0.15741596354378595
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,42,743.92,42000,0.452381,2,-2,997.214


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-17_09-07-12
  done: false
  episode_len_mean: 997.2790697674419
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.4418604651162791
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005631351470947267
          cur_lr: 5.000000000000001e-05
          entropy: 1.14967283738984
          entropy_coeff: 0.009999999999999998
          kl: 0.006625710235183549
          policy_loss: -0.09162064178122414
          total_loss: -0.05439729541540146
          vf_explained_var: 0.1865161955356598
          vf_loss: 0.04868276430190437
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,43,758.607,43000,0.44186,2,-2,997.279


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-17_09-07-27
  done: false
  episode_len_mean: 997.3409090909091
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.4318181818181818
  episode_reward_min: -2.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005631351470947267
          cur_lr: 5.000000000000001e-05
          entropy: 1.5334487398465475
          entropy_coeff: 0.009999999999999998
          kl: 0.024723269552412653
          policy_loss: 0.07296109489268726
          total_loss: 0.1261789468427499
          vf_explained_var: 0.48996394872665405
          vf_loss: 0.0684131158515811
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 44
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,44,773.087,44000,0.431818,2,-2,997.341


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-17_09-07-42
  done: false
  episode_len_mean: 997.4
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.3111111111111111
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0084470272064209
          cur_lr: 5.000000000000001e-05
          entropy: 1.5882663144005669
          entropy_coeff: 0.009999999999999998
          kl: 0.01239336890344597
          policy_loss: 0.0478071649869283
          total_loss: 0.15045236899620956
          vf_explained_var: 0.34111928939819336
          vf_loss: 0.11842317655682563
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,45,788.124,45000,0.311111,2,-5,997.4


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-17_09-07-57
  done: false
  episode_len_mean: 997.4565217391304
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.30434782608695654
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0084470272064209
          cur_lr: 5.000000000000001e-05
          entropy: 1.2607011801666683
          entropy_coeff: 0.009999999999999998
          kl: 0.018534425575501116
          policy_loss: -0.031787022948265076
          total_loss: 0.0060950517654418945
          vf_explained_var: 0.39898261427879333
          vf_loss: 0.05033252677725007
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,46,803.181,46000,0.304348,2,-5,997.457


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-17_09-08-12
  done: false
  episode_len_mean: 997.5106382978723
  episode_media: {}
  episode_reward_max: 2.0
  episode_reward_mean: 0.2978723404255319
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0084470272064209
          cur_lr: 5.000000000000001e-05
          entropy: 1.7163839750819736
          entropy_coeff: 0.009999999999999998
          kl: 0.013837920375420589
          policy_loss: 0.02865282907668087
          total_loss: 0.10785167631175784
          vf_explained_var: -0.07974981516599655
          vf_loss: 0.09624579610923926
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,47,818.322,47000,0.297872,2,-5,997.511


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-17_09-08-27
  done: false
  episode_len_mean: 997.5625
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.3541666666666667
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0084470272064209
          cur_lr: 5.000000000000001e-05
          entropy: 1.5505958623356288
          entropy_coeff: 0.009999999999999998
          kl: 0.021654661064537083
          policy_loss: 0.03991184590591325
          total_loss: 0.04953656105531586
          vf_explained_var: 0.4058457612991333
          vf_loss: 0.024947756298610735
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,48,833.275,48000,0.354167,3,-5,997.562


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-17_09-08-42
  done: false
  episode_len_mean: 997.6122448979592
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.3673469387755102
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 1.6666306376457214
          entropy_coeff: 0.009999999999999998
          kl: 0.015180862022473837
          policy_loss: -0.047956634602612916
          total_loss: 0.049948445997304386
          vf_explained_var: 0.2171301692724228
          vf_loss: 0.1143790375182612
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,49,847.795,49000,0.367347,3,-5,997.612


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-17_09-08-56
  done: false
  episode_len_mean: 997.66
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.34
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 1.3756437804963855
          entropy_coeff: 0.009999999999999998
          kl: 0.013339462082201298
          policy_loss: 0.08645544995864232
          total_loss: 0.17211947329342364
          vf_explained_var: 0.2737703025341034
          vf_loss: 0.0992514437271489
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 192.168.1.96
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,50,862.364,50000,0.34,3,-5,997.66


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-17_09-09-11
  done: false
  episode_len_mean: 997.7058823529412
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.3333333333333333
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 1.683243719736735
          entropy_coeff: 0.009999999999999998
          kl: 0.0192242976854572
          policy_loss: -0.006373980848325624
          total_loss: 0.018687032784024876
          vf_explained_var: 0.6795833706855774
          vf_loss: 0.041649866311086546
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,51,877.592,51000,0.333333,3,-5,997.706


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-17_09-09-27
  done: false
  episode_len_mean: 997.75
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.2692307692307692
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 2.1112701972325643
          entropy_coeff: 0.009999999999999998
          kl: 0.010377769951912728
          policy_loss: 0.046113262263437114
          total_loss: 0.04767000178496043
          vf_explained_var: -0.08146780729293823
          vf_loss: 0.022537948987964126
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,52,892.656,52000,0.269231,3,-5,997.75


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-17_09-09-42
  done: false
  episode_len_mean: 997.7924528301887
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.24528301886792453
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 1.6024000260565017
          entropy_coeff: 0.009999999999999998
          kl: 0.013060612571438708
          policy_loss: -0.00039494501219855413
          total_loss: 0.002832987904548645
          vf_explained_var: -0.3462499976158142
          vf_loss: 0.019086447546255336
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,53,907.758,53000,0.245283,3,-5,997.792


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-17_09-09-56
  done: false
  episode_len_mean: 997.8333333333334
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.25925925925925924
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 1.7409509036276076
          entropy_coeff: 0.009999999999999998
          kl: 0.015094905029495169
          policy_loss: 0.03577265996072027
          total_loss: 0.06783842742443084
          vf_explained_var: -0.037606339901685715
          vf_loss: 0.049284014597328174
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,54,922.173,54000,0.259259,3,-5,997.833


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-17_09-10-11
  done: false
  episode_len_mean: 997.8727272727273
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.2909090909090909
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012670540809631342
          cur_lr: 5.000000000000001e-05
          entropy: 2.1473966399828592
          entropy_coeff: 0.009999999999999998
          kl: 0.02088398168716823
          policy_loss: 0.00046231779787275524
          total_loss: 0.039526440906855795
          vf_explained_var: 0.5066514611244202
          vf_loss: 0.06027347942710751
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,55,936.744,55000,0.290909,3,-5,997.873


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-17_09-10-26
  done: false
  episode_len_mean: 997.9107142857143
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.32142857142857145
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019005811214447027
          cur_lr: 5.000000000000001e-05
          entropy: 2.119206182161967
          entropy_coeff: 0.009999999999999998
          kl: 0.01888081276414457
          policy_loss: -0.07501299074954457
          total_loss: -0.05673553273081779
          vf_explained_var: 0.3690633475780487
          vf_loss: 0.03911067712017231
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,56,951.757,56000,0.321429,3,-5,997.911


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-17_09-10-41
  done: false
  episode_len_mean: 997.9473684210526
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.2631578947368421
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019005811214447027
          cur_lr: 5.000000000000001e-05
          entropy: 2.1940232541826035
          entropy_coeff: 0.009999999999999998
          kl: 0.014459323826784025
          policy_loss: 0.07191730116804441
          total_loss: 0.15146883034871683
          vf_explained_var: 0.11930849403142929
          vf_loss: 0.10121695428258842
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,57,966.831,57000,0.263158,3,-5,997.947


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-17_09-10-56
  done: false
  episode_len_mean: 997.9827586206897
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.29310344827586204
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019005811214447027
          cur_lr: 5.000000000000001e-05
          entropy: 2.002727544307709
          entropy_coeff: 0.009999999999999998
          kl: 0.06078847841760319
          policy_loss: 0.12763537681765028
          total_loss: 0.11644090306427744
          vf_explained_var: 0.48857319355010986
          vf_loss: 0.0076774700493034385
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,58,981.512,58000,0.293103,3,-5,997.983


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-17_09-11-11
  done: false
  episode_len_mean: 998.0169491525423
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.288135593220339
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028508716821670534
          cur_lr: 5.000000000000001e-05
          entropy: 2.367446684837341
          entropy_coeff: 0.009999999999999998
          kl: 0.021795890059411724
          policy_loss: 0.0523942273731033
          total_loss: 0.18308414405004847
          vf_explained_var: 0.15476839244365692
          vf_loss: 0.1537430095175902
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,59,996.693,59000,0.288136,3,-5,998.017




Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-17_09-11-42
  done: false
  episode_len_mean: 995.8333333333334
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.23333333333333334
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0427630752325058
          cur_lr: 5.000000000000001e-05
          entropy: 2.300815212726593
          entropy_coeff: 0.009999999999999998
          kl: 0.013297160648327984
          policy_loss: 0.08462678185767597
          total_loss: 0.11545389542977015
          vf_explained_var: 0.17279617488384247
          vf_loss: 0.053266639029607175
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,60,1028.11,60000,0.233333,3,-5,995.833


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-17_09-12-00
  done: false
  episode_len_mean: 995.9016393442623
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.22950819672131148
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0427630752325058
          cur_lr: 5.000000000000001e-05
          entropy: 2.327588857544793
          entropy_coeff: 0.009999999999999998
          kl: 0.015096379353695157
          policy_loss: 0.028094798367884425
          total_loss: 0.17307958321438896
          vf_explained_var: 0.2852250933647156
          vf_loss: 0.1676151064534982
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,61,1045.69,61000,0.229508,3,-5,995.902


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-17_09-12-15
  done: false
  episode_len_mean: 995.9677419354839
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.24193548387096775
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0427630752325058
          cur_lr: 5.000000000000001e-05
          entropy: 2.4021497276094226
          entropy_coeff: 0.009999999999999998
          kl: 0.04013951747441043
          policy_loss: 0.011018658429384232
          total_loss: 0.08925801254808903
          vf_explained_var: -0.12355893105268478
          vf_loss: 0.10054436191502544
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,62,1060.47,62000,0.241935,3,-5,995.968


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-17_09-12-30
  done: false
  episode_len_mean: 996.031746031746
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.2698412698412698
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06414461284875872
          cur_lr: 5.000000000000001e-05
          entropy: 2.530714217821757
          entropy_coeff: 0.009999999999999998
          kl: 0.028014880789343857
          policy_loss: 0.04063880424946546
          total_loss: 0.03212589598778221
          vf_explained_var: -0.3227521777153015
          vf_loss: 0.014997230496050583
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 63
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,63,1075.73,63000,0.269841,3,-5,996.032


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-17_09-12-45
  done: false
  episode_len_mean: 996.09375
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.25
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09621691927313802
          cur_lr: 5.000000000000001e-05
          entropy: 2.0882544848654003
          entropy_coeff: 0.009999999999999998
          kl: 0.013362699800084154
          policy_loss: 0.09828603019316991
          total_loss: 0.11111870276638203
          vf_explained_var: -0.03814312070608139
          vf_loss: 0.0324295015177793
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 192.168.1.96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,64,1091.07,64000,0.25,3,-5,996.094


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-17_09-13-00
  done: false
  episode_len_mean: 996.1538461538462
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.24615384615384617
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09621691927313802
          cur_lr: 5.000000000000001e-05
          entropy: 2.454525521066454
          entropy_coeff: 0.009999999999999998
          kl: 0.016052547004071768
          policy_loss: -0.020937303288115397
          total_loss: 0.0569317452609539
          vf_explained_var: -0.11801651120185852
          vf_loss: 0.10086977941294512
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,65,1105.96,65000,0.246154,3,-5,996.154


Result for PPO_my_env_ccae9_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-17_09-13-15
  done: false
  episode_len_mean: 996.2121212121212
  episode_media: {}
  episode_reward_max: 3.0
  episode_reward_mean: 0.2727272727272727
  episode_reward_min: -5.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 92f384b1516d4da09515d85b259ef30e
  hostname: cdsserver
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09621691927313802
          cur_lr: 5.000000000000001e-05
          entropy: 2.242608322037591
          entropy_coeff: 0.009999999999999998
          kl: 0.02377127594185347
          policy_loss: 0.04443291384312842
          total_loss: 0.059659829032089974
          vf_explained_var: 0.1345871090888977
          vf_loss: 0.035365797069648076
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ccae9_00000,RUNNING,192.168.1.96:177,66,1120.82,66000,0.272727,3,-5,996.212
