In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.mlp = nn.Sequential(
            nn.Linear(features_dim, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(256, action_space.n)
        self.value_head = nn.Linear(256, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.mlp.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
        
        with torch.no_grad():
            features = self.encoder(obs)
        features = self.mlp(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name":  "PPO C32 pretrained (frozen AngelaCNN + MLP) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_aa44d_00000,PENDING,


2021-10-23 19:44:41,765	INFO wandb.py:170 -- Already logged into W&B.
2021-10-23 19:44:41,777	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=343315)[0m 2021-10-23 19:44:45,328	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=343315)[0m 2021-10-23 19:44:45,328	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-23_19-45-50
  done: false
  episode_len_mean: 387.0
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -3.8699999999999615
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.878217119640774
          entropy_coeff: 0.009999999999999998
          kl: 0.009090882563476023
          policy_loss: 0.030945457393924396
          total_loss: 0.006289673762189018
          vf_explained_var: 0.16710948944091797
          vf_loss: 0.0023082126488184764
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,1,59.236,1000,-3.87,-3.86,-3.88,387


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-23_19-46-14
  done: false
  episode_len_mean: 398.4
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -5.127999999999959
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 5
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8717774285210504
          entropy_coeff: 0.009999999999999998
          kl: 0.007649655589335996
          policy_loss: 0.022169906728797487
          total_loss: 0.2890206274886926
          vf_explained_var: -0.033184196799993515
          vf_loss: 0.29403856550860735
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,2,83.2506,2000,-5.128,-3.86,-9.86,398.4


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-23_19-46-35
  done: false
  episode_len_mean: 401.7142857142857
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.834285714285672
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 7
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.866729998588562
          entropy_coeff: 0.009999999999999998
          kl: 0.007617120640092286
          policy_loss: 0.0348946451726887
          total_loss: 0.019126118885146245
          vf_explained_var: -0.21593870222568512
          vf_loss: 0.011375348306157523
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 30

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,3,104.441,3000,-4.83429,-3.86,-9.86,401.714


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-23_19-46-55
  done: false
  episode_len_mean: 402.1111111111111
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.656666666666625
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8545502185821534
          entropy_coeff: 0.009999999999999998
          kl: 0.007762734919247608
          policy_loss: -0.05023912841247188
          total_loss: -0.0650040199359258
          vf_explained_var: 0.41888394951820374
          vf_loss: 0.012228064358027445
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,4,124.163,4000,-4.65667,-3.86,-9.86,402.111


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-23_19-47-15
  done: false
  episode_len_mean: 406.3333333333333
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.539999999999957
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8450461864471435
          entropy_coeff: 0.009999999999999998
          kl: 0.008237960288403349
          policy_loss: 0.010300463934739431
          total_loss: -0.00767212708791097
          vf_explained_var: 0.4035176634788513
          vf_loss: 0.00883028249308053
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,5,144.23,5000,-4.54,-3.86,-9.86,406.333


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-23_19-47-35
  done: false
  episode_len_mean: 409.14285714285717
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.4999999999999565
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.834255743026733
          entropy_coeff: 0.009999999999999998
          kl: 0.0067657954882664965
          policy_loss: -0.07383443415164948
          total_loss: -0.09498233000437419
          vf_explained_var: 0.5223467946052551
          vf_loss: 0.00584150311899268
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,6,164.037,6000,-4.5,-3.86,-9.86,409.143


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-23_19-47-55
  done: false
  episode_len_mean: 409.29411764705884
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.429411764705839
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.815087464120653
          entropy_coeff: 0.009999999999999998
          kl: 0.007929158434775666
          policy_loss: -0.06051536053419113
          total_loss: -0.08177518033319049
          vf_explained_var: 0.11704941838979721
          vf_loss: 0.005305221888961063
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,7,184.015,7000,-4.42941,-3.86,-9.86,409.294


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-23_19-48-15
  done: false
  episode_len_mean: 409.42105263157896
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.395263157894694
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 19
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.800937543974982
          entropy_coeff: 0.009999999999999998
          kl: 0.008380999908755252
          policy_loss: 0.018658138066530227
          total_loss: -0.004579783934685919
          vf_explained_var: 0.41173985600471497
          vf_loss: 0.0030952507921028884
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,8,203.794,8000,-4.39526,-3.86,-9.86,409.421


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-23_19-48-35
  done: false
  episode_len_mean: 408.6818181818182
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.34681818181814
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 22
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7836355288823444
          entropy_coeff: 0.009999999999999998
          kl: 0.009291714395237112
          policy_loss: 0.026655024538437527
          total_loss: 0.007051481886042489
          vf_explained_var: 0.38125932216644287
          vf_loss: 0.006374472229638033
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,9,223.854,9000,-4.34682,-3.86,-9.86,408.682


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-23_19-48-54
  done: false
  episode_len_mean: 409.6666666666667
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.334999999999957
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 24
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.785961969693502
          entropy_coeff: 0.009999999999999998
          kl: 0.011949275585753583
          policy_loss: 0.14223714901341333
          total_loss: 0.1197340795563327
          vf_explained_var: 0.8202787041664124
          vf_loss: 0.0029666936406607014
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,10,242.987,10000,-4.335,-3.86,-9.86,409.667


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-23_19-49-14
  done: false
  episode_len_mean: 411.6923076923077
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.336923076923034
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7294529941346912
          entropy_coeff: 0.009999999999999998
          kl: 0.011171176208441599
          policy_loss: -0.0838297329015202
          total_loss: -0.10368211054139667
          vf_explained_var: 0.6892042756080627
          vf_loss: 0.005207913873406748
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,11,263.331,11000,-4.33692,-3.86,-9.86,411.692


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-23_19-49-34
  done: false
  episode_len_mean: 414.7857142857143
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.352142857142814
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7021663268407186
          entropy_coeff: 0.009999999999999998
          kl: 0.010727903101116675
          policy_loss: -0.07177570462226868
          total_loss: -0.08795975413587358
          vf_explained_var: 0.48596832156181335
          vf_loss: 0.00869203625091662
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,12,282.863,12000,-4.35214,-3.86,-9.86,414.786




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-23_19-50-14
  done: false
  episode_len_mean: 416.61290322580646
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.350645161290279
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 31
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.713387293285794
          entropy_coeff: 0.009999999999999998
          kl: 0.00821484380528409
          policy_loss: -0.015414113468594021
          total_loss: -0.03565204524331623
          vf_explained_var: 0.6229920387268066
          vf_loss: 0.005252974281190998
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,13,322.489,13000,-4.35065,-3.86,-9.86,416.613


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-23_19-50-34
  done: false
  episode_len_mean: 419.6666666666667
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.369999999999955
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 33
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7082553678088717
          entropy_coeff: 0.009999999999999998
          kl: 0.010467954907779574
          policy_loss: 0.03292229092783398
          total_loss: 0.00982908486492104
          vf_explained_var: 0.7748987674713135
          vf_loss: 0.0018957547882261375
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,14,342.551,14000,-4.37,-3.86,-9.86,419.667


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-23_19-50-55
  done: false
  episode_len_mean: 420.9428571428571
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.3728571428570975
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 35
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6755916357040403
          entropy_coeff: 0.009999999999999998
          kl: 0.009193660526080037
          policy_loss: 0.04141766611072752
          total_loss: 0.019485084836681685
          vf_explained_var: 0.9250808954238892
          vf_loss: 0.0029846012365952546
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,15,364.006,15000,-4.37286,-3.86,-9.86,420.943


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-23_19-51-15
  done: false
  episode_len_mean: 423.2162162162162
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.386756756756712
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 37
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6199710687001545
          entropy_coeff: 0.009999999999999998
          kl: 0.008861371269091634
          policy_loss: -0.09029238025347391
          total_loss: -0.11030173699061076
          vf_explained_var: 0.847046434879303
          vf_loss: 0.004418079408868733
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,16,383.972,16000,-4.38676,-3.86,-9.86,423.216


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-23_19-51-36
  done: false
  episode_len_mean: 424.64102564102564
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.393076923076878
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6060307926601833
          entropy_coeff: 0.009999999999999998
          kl: 0.01159370513129631
          policy_loss: -0.08199029150936339
          total_loss: -0.1017773429552714
          vf_explained_var: 0.876018762588501
          vf_loss: 0.003954512021866524
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,17,404.336,17000,-4.39308,-3.86,-9.86,424.641


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-23_19-51-55
  done: false
  episode_len_mean: 426.5238095238095
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.401428571428525
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.562554793887668
          entropy_coeff: 0.009999999999999998
          kl: 0.008552329102289072
          policy_loss: -0.06480226375990444
          total_loss: -0.08488140867816077
          vf_explained_var: 0.9009752869606018
          vf_loss: 0.003835937565761722
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,18,424.182,18000,-4.40143,-3.86,-9.86,426.524


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-23_19-52-16
  done: false
  episode_len_mean: 427.79545454545456
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.407954545454499
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 44
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5560142676035564
          entropy_coeff: 0.009999999999999998
          kl: 0.011619525023541695
          policy_loss: 0.10558415297418833
          total_loss: 0.08566944706771108
          vf_explained_var: 0.8137038350105286
          vf_loss: 0.0033215316103046965
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,19,444.516,19000,-4.40795,-3.86,-9.86,427.795


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-23_19-52-35
  done: false
  episode_len_mean: 428.4782608695652
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.409130434782561
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 46
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5528295093112523
          entropy_coeff: 0.009999999999999998
          kl: 0.009442039382388353
          policy_loss: 0.029559416737821368
          total_loss: 0.011490584082073636
          vf_explained_var: 0.6533993482589722
          vf_loss: 0.005571053334925738
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,20,463.94,20000,-4.40913,-3.86,-9.86,428.478


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-23_19-52-53
  done: false
  episode_len_mean: 429.75
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.416666666666619
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 48
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.52110681798723
          entropy_coeff: 0.009999999999999998
          kl: 0.008294742653086088
          policy_loss: -0.16767918053600522
          total_loss: -0.18557664371199079
          vf_explained_var: 0.8260522484779358
          vf_loss: 0.0056546550502793655
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,21,482.22,21000,-4.41667,-3.86,-9.86,429.75


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-23_19-53-11
  done: false
  episode_len_mean: 431.18
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.426199999999953
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 50
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4885539531707765
          entropy_coeff: 0.009999999999999998
          kl: 0.010789211548255828
          policy_loss: -0.15868818163871765
          total_loss: -0.17491533988051944
          vf_explained_var: 0.4452061951160431
          vf_loss: 0.006500538298860192
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,22,499.59,22000,-4.4262,-3.86,-9.86,431.18


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-23_19-53-29
  done: false
  episode_len_mean: 433.8490566037736
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.446415094339574
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4855319446987574
          entropy_coeff: 0.009999999999999998
          kl: 0.011116304088179405
          policy_loss: -0.07276245761248801
          total_loss: -0.08999257741702928
          vf_explained_var: 0.7027759552001953
          vf_loss: 0.005401940670950959
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,23,517.884,23000,-4.44642,-3.86,-9.86,433.849


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-23_19-53-48
  done: false
  episode_len_mean: 434.3090909090909
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.447090909090861
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 55
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4416831970214843
          entropy_coeff: 0.009999999999999998
          kl: 0.011761002998574079
          policy_loss: 0.07479316372838285
          total_loss: 0.056353772928317385
          vf_explained_var: 0.8078038692474365
          vf_loss: 0.0036252363817766307
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,24,536.674,24000,-4.44709,-3.86,-9.86,434.309


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-23_19-54-08
  done: false
  episode_len_mean: 434.42105263157896
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.4445614035087235
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 57
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.481146952841017
          entropy_coeff: 0.009999999999999998
          kl: 0.010945687448970521
          policy_loss: -0.02685449090268877
          total_loss: -0.0422958160440127
          vf_explained_var: 0.5427663326263428
          vf_loss: 0.007181010805329101
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,25,556.277,25000,-4.44456,-3.86,-9.86,434.421


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-23_19-54-27
  done: false
  episode_len_mean: 434.20338983050846
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.438983050847409
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 59
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3590919944975113
          entropy_coeff: 0.009999999999999998
          kl: 0.008136656733831163
          policy_loss: -0.10379506465461519
          total_loss: -0.11820235401391983
          vf_explained_var: 0.6632922887802124
          vf_loss: 0.007556301454314962
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,26,575.396,26000,-4.43898,-3.86,-9.86,434.203




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-23_19-55-04
  done: false
  episode_len_mean: 433.9032258064516
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.431290322580597
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 62
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3308644851048785
          entropy_coeff: 0.009999999999999998
          kl: 0.008920217118296984
          policy_loss: 0.05101245252622499
          total_loss: 0.03725331781638993
          vf_explained_var: 0.6061708331108093
          vf_loss: 0.007765467651188374
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,27,612.799,27000,-4.43129,-3.86,-9.86,433.903


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-23_19-55-23
  done: false
  episode_len_mean: 434.28125
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.4321874999999515
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 64
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3821584966447618
          entropy_coeff: 0.009999999999999998
          kl: 0.008633569892946823
          policy_loss: 0.11343564556704627
          total_loss: 0.09655558301342858
          vf_explained_var: 0.3652201294898987
          vf_loss: 0.005214814822668106
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,28,631.335,28000,-4.43219,-3.86,-9.86,434.281


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-23_19-55-41
  done: false
  episode_len_mean: 434.7878787878788
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.434545454545406
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.308122817675273
          entropy_coeff: 0.009999999999999998
          kl: 0.011663353597190548
          policy_loss: -0.07617642978827159
          total_loss: -0.08680824248327149
          vf_explained_var: 0.3994485139846802
          vf_loss: 0.01011674489062797
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,29,649.737,29000,-4.43455,-3.86,-9.86,434.788


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-23_19-56-00
  done: false
  episode_len_mean: 434.5735294117647
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.429852941176422
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 68
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.304282988442315
          entropy_coeff: 0.009999999999999998
          kl: 0.0107477344045306
          policy_loss: -0.1283126112487581
          total_loss: -0.13840490447150336
          vf_explained_var: 0.24112474918365479
          vf_loss: 0.010800988744530412
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,30,668.988,30000,-4.42985,-3.86,-9.86,434.574


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-23_19-56-21
  done: false
  episode_len_mean: 434.49295774647885
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.425492957746431
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 71
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3144882864422267
          entropy_coeff: 0.009999999999999998
          kl: 0.0082653286962029
          policy_loss: 0.04817105664147271
          total_loss: 0.03645745457874404
          vf_explained_var: 0.5554531216621399
          vf_loss: 0.009778217032241325
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,31,689.509,31000,-4.42549,-3.86,-9.86,434.493


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-23_19-56-40
  done: false
  episode_len_mean: 434.8904109589041
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.4272602739725535
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.338136445151435
          entropy_coeff: 0.009999999999999998
          kl: 0.008405245413227114
          policy_loss: -0.05098868658145269
          total_loss: -0.06251714080572128
          vf_explained_var: 0.5227544903755188
          vf_loss: 0.010171856784856775
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,32,708.126,32000,-4.42726,-3.86,-9.86,434.89


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-23_19-56-58
  done: false
  episode_len_mean: 435.3466666666667
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.429733333333284
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.322149059507582
          entropy_coeff: 0.009999999999999998
          kl: 0.00837809552291853
          policy_loss: -0.06784811433818605
          total_loss: -0.0790189661913448
          vf_explained_var: 0.45956292748451233
          vf_loss: 0.010375020685347004
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,33,726.22,33000,-4.42973,-3.86,-9.86,435.347


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-23_19-57-17
  done: false
  episode_len_mean: 435.8181818181818
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.432467532467483
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 77
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2834779103597005
          entropy_coeff: 0.009999999999999998
          kl: 0.008530085170790845
          policy_loss: -0.13576504190762836
          total_loss: -0.14635975360870362
          vf_explained_var: 0.28741344809532166
          vf_loss: 0.010534047802341066
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,34,745.215,34000,-4.43247,-3.86,-9.86,435.818


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-23_19-57-36
  done: false
  episode_len_mean: 436.3
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.434499999999952
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 80
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2537445704142254
          entropy_coeff: 0.009999999999999998
          kl: 0.007133612985875424
          policy_loss: 0.029798848927021025
          total_loss: 0.016907573325766458
          vf_explained_var: 0.5491291284561157
          vf_loss: 0.008219449791229433
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,35,764.067,35000,-4.4345,-3.86,-9.86,436.3


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-23_19-57-56
  done: false
  episode_len_mean: 436.0609756097561
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.430365853658488
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 82
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.183645706706577
          entropy_coeff: 0.009999999999999998
          kl: 0.008987793154797172
          policy_loss: -0.04155415611134635
          total_loss: -0.05139942251973682
          vf_explained_var: 0.4208368957042694
          vf_loss: 0.01019362923818537
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,36,784.184,36000,-4.43037,-3.86,-9.86,436.061


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-23_19-58-17
  done: false
  episode_len_mean: 435.98809523809524
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.427976190476142
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 84
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.222153674231635
          entropy_coeff: 0.009999999999999998
          kl: 0.010766922967580052
          policy_loss: -0.11927219405770302
          total_loss: -0.13105402431554264
          vf_explained_var: 0.5142672061920166
          vf_loss: 0.008286324281814611
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,37,805.546,37000,-4.42798,-3.86,-9.86,435.988


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-23_19-58-36
  done: false
  episode_len_mean: 436.14942528735634
  episode_media: {}
  episode_reward_max: -3.8599999999999617
  episode_reward_mean: -4.427241379310296
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 87
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.275034189224243
          entropy_coeff: 0.009999999999999998
          kl: 0.00595967333335964
          policy_loss: -0.011781069636344909
          total_loss: -0.024211448265446557
          vf_explained_var: 0.5969095230102539
          vf_loss: 0.00912802927972128
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,38,824.656,38000,-4.42724,-3.86,-9.86,436.149


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-23_19-58-56
  done: false
  episode_len_mean: 435.8426966292135
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.400112359550514
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 89
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.289850730366177
          entropy_coeff: 0.009999999999999998
          kl: 0.012392546961491427
          policy_loss: -0.04330721406473054
          total_loss: -0.029145468026399612
          vf_explained_var: 0.1712469905614853
          vf_loss: 0.034581739041540355
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,39,844.423,39000,-4.40011,-2.29,-9.86,435.843




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-23_19-59-32
  done: false
  episode_len_mean: 436.38461538461536
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.404615384615337
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 91
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.279676389694214
          entropy_coeff: 0.009999999999999998
          kl: 0.011365624880625072
          policy_loss: -0.06780133959319856
          total_loss: -0.07792381130986743
          vf_explained_var: 0.5251362919807434
          vf_loss: 0.010401167423050436
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,40,879.885,40000,-4.40462,-2.29,-9.86,436.385


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-23_19-59-54
  done: false
  episode_len_mean: 436.31182795698925
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.4030107526881235
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 93
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2330397420459325
          entropy_coeff: 0.009999999999999998
          kl: 0.009429774579956005
          policy_loss: -0.11293440775738822
          total_loss: -0.12432260281509823
          vf_explained_var: 0.5818276405334473
          vf_loss: 0.009056247741035703
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,41,901.913,41000,-4.40301,-2.29,-9.86,436.312


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-23_20-00-24
  done: false
  episode_len_mean: 436.0
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.398645833333286
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 96
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.233578191863166
          entropy_coeff: 0.009999999999999998
          kl: 0.007511988361391237
          policy_loss: -0.005438137302796046
          total_loss: -0.019096689919630688
          vf_explained_var: 0.6500407457351685
          vf_loss: 0.007174827655156453
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,42,931.896,42000,-4.39865,-2.29,-9.86,436


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-23_20-00-51
  done: false
  episode_len_mean: 435.98979591836735
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.397755102040769
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 98
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1640738964080812
          entropy_coeff: 0.009999999999999998
          kl: 0.008139213958745846
          policy_loss: -0.01217754301097658
          total_loss: -0.025674344930383893
          vf_explained_var: 0.7681093811988831
          vf_loss: 0.006516095623373986
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,43,958.876,43000,-4.39776,-2.29,-9.86,435.99


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-23_20-01-16
  done: false
  episode_len_mean: 435.8
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.414899999999951
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 3
  episodes_total: 101
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1595092799928453
          entropy_coeff: 0.009999999999999998
          kl: 0.010073970001347899
          policy_loss: 0.02578423014945454
          total_loss: 0.034863235221968755
          vf_explained_var: 0.6495180130004883
          vf_loss: 0.02865930088640501
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,44,984.723,44000,-4.4149,-2.29,-9.86,435.8


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-23_20-01-42
  done: false
  episode_len_mean: 435.82
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.415099999999952
  episode_reward_min: -9.859999999999955
  episodes_this_iter: 2
  episodes_total: 103
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1553941620720756
          entropy_coeff: 0.009999999999999998
          kl: 0.004990846914951171
          policy_loss: 0.01546290549967024
          total_loss: 0.004057837526003519
          vf_explained_var: 0.7196251153945923
          vf_loss: 0.009150704681976802
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,45,1010.64,45000,-4.4151,-2.29,-9.86,435.82


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-23_20-02-09
  done: false
  episode_len_mean: 435.29
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.352599999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 106
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.218172033627828
          entropy_coeff: 0.009999999999999998
          kl: 0.009678379322517867
          policy_loss: -0.05322426466478242
          total_loss: -0.0645353032482995
          vf_explained_var: 0.696117103099823
          vf_loss: 0.009902840425881246
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,46,1036.96,46000,-4.3526,-2.29,-6.15,435.29


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-23_20-02-33
  done: false
  episode_len_mean: 435.59
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.355599999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 108
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1891521215438843
          entropy_coeff: 0.009999999999999998
          kl: 0.008879403202508001
          policy_loss: 0.05700274035334587
          total_loss: 0.04059322037630611
          vf_explained_var: 0.771797239780426
          vf_loss: 0.004594059765157807
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,47,1061.37,47000,-4.3556,-2.29,-6.15,435.59


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-23_20-02-58
  done: false
  episode_len_mean: 435.68
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.356499999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.17584527598487
          entropy_coeff: 0.009999999999999998
          kl: 0.009575842308693143
          policy_loss: -0.12981241138445007
          total_loss: -0.1441402830183506
          vf_explained_var: 0.7631569504737854
          vf_loss: 0.006472995740154551
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,48,1085.95,48000,-4.3565,-2.29,-6.15,435.68


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-23_20-03-22
  done: false
  episode_len_mean: 435.67
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.356399999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 113
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.139356944296095
          entropy_coeff: 0.009999999999999998
          kl: 0.014269520725157806
          policy_loss: -0.008985245476166408
          total_loss: -0.02292207951347033
          vf_explained_var: 0.8264049291610718
          vf_loss: 0.00602978366595279
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,49,1109.87,49000,-4.3564,-2.29,-6.15,435.67


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-23_20-03-44
  done: false
  episode_len_mean: 436.53
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.364999999999951
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 115
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1486356205410426
          entropy_coeff: 0.009999999999999998
          kl: 0.007830625393956462
          policy_loss: 0.11732956502172683
          total_loss: 0.10307966768741608
          vf_explained_var: 0.34841614961624146
          vf_loss: 0.006453398668155488
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,50,1131.96,50000,-4.365,-2.29,-6.15,436.53


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-23_20-04-08
  done: false
  episode_len_mean: 436.67
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.3663999999999525
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.08079170121087
          entropy_coeff: 0.009999999999999998
          kl: 0.010453668653037065
          policy_loss: -0.1222158753209644
          total_loss: -0.13340416567193136
          vf_explained_var: 0.4413968324661255
          vf_loss: 0.008574259513989092
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,51,1156.54,51000,-4.3664,-2.29,-6.15,436.67




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-23_20-04-49
  done: false
  episode_len_mean: 436.84
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.368099999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.072778908411662
          entropy_coeff: 0.009999999999999998
          kl: 0.00922540269739426
          policy_loss: 0.014142849544684092
          total_loss: 0.003514394329653846
          vf_explained_var: 0.5955776572227478
          vf_loss: 0.009176795076604726
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,52,1196.97,52000,-4.3681,-2.29,-6.15,436.84


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-23_20-05-13
  done: false
  episode_len_mean: 437.23
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.371999999999951
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 122
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.137690846125285
          entropy_coeff: 0.009999999999999998
          kl: 0.010285055192131468
          policy_loss: -0.061753562009996835
          total_loss: -0.07394151596559419
          vf_explained_var: 0.543520450592041
          vf_loss: 0.008160451526701864
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,53,1221.52,53000,-4.372,-2.29,-6.15,437.23


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-23_20-05-38
  done: false
  episode_len_mean: 437.0
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.369699999999951
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1552549733055963
          entropy_coeff: 0.009999999999999998
          kl: 0.009697169464143377
          policy_loss: 0.020159436265627544
          total_loss: 0.010165009316470889
          vf_explained_var: 0.46772316098213196
          vf_loss: 0.010588403115333576
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,54,1246.07,54000,-4.3697,-2.29,-6.15,437


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-23_20-06-00
  done: false
  episode_len_mean: 437.66
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.376299999999951
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 127
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.114791488647461
          entropy_coeff: 0.009999999999999998
          kl: 0.010898284687231946
          policy_loss: 0.0941600567764706
          total_loss: 0.07779857880539365
          vf_explained_var: 0.5364037752151489
          vf_loss: 0.003696608901049735
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,55,1268.38,55000,-4.3763,-2.29,-6.15,437.66


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-23_20-06-25
  done: false
  episode_len_mean: 437.09
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.370599999999951
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 129
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.106161361270481
          entropy_coeff: 0.009999999999999998
          kl: 0.008702019156740053
          policy_loss: 0.0018217076857884726
          total_loss: -0.011060031255086263
          vf_explained_var: 0.1770768165588379
          vf_loss: 0.00730967154312465
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,56,1292.49,56000,-4.3706,-2.29,-6.15,437.09


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-23_20-06-49
  done: false
  episode_len_mean: 436.86
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.368299999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 131
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9894014636675517
          entropy_coeff: 0.009999999999999998
          kl: 0.01015785820253308
          policy_loss: -0.09869575045175022
          total_loss: -0.11147152731815974
          vf_explained_var: 0.6715469360351562
          vf_loss: 0.006102451361301872
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,57,1317.14,57000,-4.3683,-2.29,-6.15,436.86


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-23_20-07-14
  done: false
  episode_len_mean: 435.13
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.350999999999953
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 134
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9731889830695257
          entropy_coeff: 0.009999999999999998
          kl: 0.009742258518797758
          policy_loss: 0.052127044730716283
          total_loss: 0.03730539754033089
          vf_explained_var: 0.5481957793235779
          vf_loss: 0.003936015563603077
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,58,1341.76,58000,-4.351,-2.29,-6.15,435.13


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-23_20-07-39
  done: false
  episode_len_mean: 434.81
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.347799999999952
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 136
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0157004568311905
          entropy_coeff: 0.009999999999999998
          kl: 0.01042776109250449
          policy_loss: -0.12529418153895272
          total_loss: -0.13667507320642472
          vf_explained_var: 0.5395908355712891
          vf_loss: 0.007733336059997479
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,59,1366.55,59000,-4.3478,-2.29,-6.15,434.81


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-23_20-08-03
  done: false
  episode_len_mean: 433.2
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.331699999999953
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 139
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9730835517247518
          entropy_coeff: 0.009999999999999998
          kl: 0.006269380225958473
          policy_loss: 0.046831805258989334
          total_loss: 0.03586910826464494
          vf_explained_var: 0.45432335138320923
          vf_loss: 0.008141200239252713
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,60,1390.98,60000,-4.3317,-2.29,-6.15,433.2


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-23_20-08-29
  done: false
  episode_len_mean: 432.29
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.3225999999999525
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 141
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9777796732054815
          entropy_coeff: 0.009999999999999998
          kl: 0.008644253088080254
          policy_loss: -0.12626225335730445
          total_loss: -0.1397722616791725
          vf_explained_var: 0.8373710513114929
          vf_loss: 0.005403364095319476
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,61,1416.69,61000,-4.3226,-2.29,-6.15,432.29


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-23_20-08-53
  done: false
  episode_len_mean: 431.36
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.313299999999953
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 144
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.088556151919895
          entropy_coeff: 0.009999999999999998
          kl: 0.01218184892330119
          policy_loss: -0.04659910549720128
          total_loss: -0.051874770555231306
          vf_explained_var: -0.08240556716918945
          vf_loss: 0.014391709728321682
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,62,1441.19,62000,-4.3133,-2.29,-6.15,431.36


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-23_20-09-17
  done: false
  episode_len_mean: 431.49
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.314599999999953
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 146
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9973971684773764
          entropy_coeff: 0.009999999999999998
          kl: 0.008682389822048843
          policy_loss: 0.07460441295471457
          total_loss: 0.05877807471487257
          vf_explained_var: 0.7874733805656433
          vf_loss: 0.003279393924943482
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,63,1464.98,63000,-4.3146,-2.29,-6.15,431.49


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-23_20-09-42
  done: false
  episode_len_mean: 430.96
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.309299999999953
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 148
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.051754613717397
          entropy_coeff: 0.009999999999999998
          kl: 0.013698685625042926
          policy_loss: -0.06835680305957795
          total_loss: -0.08103034049272537
          vf_explained_var: 0.5799527168273926
          vf_loss: 0.0064741406654421655
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,64,1489.42,64000,-4.3093,-2.29,-6.15,430.96




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-23_20-10-27
  done: false
  episode_len_mean: 428.57
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.285399999999954
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 151
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.948362746503618
          entropy_coeff: 0.009999999999999998
          kl: 0.007245887915321999
          policy_loss: 0.020531770338614783
          total_loss: 0.005188081165154775
          vf_explained_var: 0.7554931044578552
          vf_loss: 0.0034153508591569133
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,65,1535.18,65000,-4.2854,-2.29,-6.15,428.57


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-23_20-10-54
  done: false
  episode_len_mean: 426.97
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.269399999999955
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 153
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.049194338586595
          entropy_coeff: 0.009999999999999998
          kl: 0.01267501610823951
          policy_loss: -0.10163272354337904
          total_loss: -0.10950944390561845
          vf_explained_var: 0.12567538022994995
          vf_loss: 0.011347725548936675
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,66,1561.94,66000,-4.2694,-2.29,-6.15,426.97


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-23_20-11-22
  done: false
  episode_len_mean: 426.15
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.2611999999999535
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 156
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9278341624471875
          entropy_coeff: 0.009999999999999998
          kl: 0.010113807790558009
          policy_loss: -0.014488451679547628
          total_loss: -0.022353045807944404
          vf_explained_var: 0.5158191323280334
          vf_loss: 0.010402361965841718
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,67,1590.08,67000,-4.2612,-2.29,-6.15,426.15


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-23_20-11-51
  done: false
  episode_len_mean: 425.42
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.253899999999954
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 158
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7641051994429695
          entropy_coeff: 0.009999999999999998
          kl: 0.009075877399056489
          policy_loss: -0.08164861649274827
          total_loss: -0.09408221542835235
          vf_explained_var: 0.8700914978981018
          vf_loss: 0.004299866171075134
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,68,1618.96,68000,-4.2539,-2.29,-6.15,425.42


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-23_20-12-19
  done: false
  episode_len_mean: 425.73
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.2569999999999535
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 160
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0851427965694005
          entropy_coeff: 0.009999999999999998
          kl: 0.009275961638974619
          policy_loss: -0.09826806022061242
          total_loss: -0.10885693149434196
          vf_explained_var: 0.44432204961776733
          vf_loss: 0.009334958953938136
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 6900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,69,1647.1,69000,-4.257,-2.29,-6.15,425.73


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-23_20-12-45
  done: false
  episode_len_mean: 424.84
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.248099999999954
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 163
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.008443432384067
          entropy_coeff: 0.009999999999999998
          kl: 0.012428222154738716
          policy_loss: -0.031208917415804335
          total_loss: -0.043738798300425215
          vf_explained_var: 0.8006059527397156
          vf_loss: 0.006311731445344372
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,70,1672.21,70000,-4.2481,-2.29,-6.15,424.84


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-23_20-13-11
  done: false
  episode_len_mean: 424.33
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.242999999999954
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 165
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.860249518023597
          entropy_coeff: 0.009999999999999998
          kl: 0.009752995474158215
          policy_loss: -0.13587028247614702
          total_loss: -0.15018923303319348
          vf_explained_var: 0.8783450722694397
          vf_loss: 0.003308243526973658
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,71,1698.21,71000,-4.243,-2.29,-6.15,424.33


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-23_20-13-37
  done: false
  episode_len_mean: 423.45
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.234199999999955
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 168
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.980401372909546
          entropy_coeff: 0.009999999999999998
          kl: 0.01226245025439287
          policy_loss: -0.01804441875881619
          total_loss: -0.0285046539372868
          vf_explained_var: 0.5289773941040039
          vf_loss: 0.008117532603339188
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,72,1724.56,72000,-4.2342,-2.29,-6.15,423.45


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-23_20-14-05
  done: false
  episode_len_mean: 422.06
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.2202999999999555
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 2
  episodes_total: 170
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8833481152852376
          entropy_coeff: 0.009999999999999998
          kl: 0.010864808690368832
          policy_loss: -0.10135316091279188
          total_loss: -0.11567855262094073
          vf_explained_var: 0.8659747838973999
          vf_loss: 0.0034216089795033136
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 7300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,73,1752.71,73000,-4.2203,-2.29,-6.15,422.06


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-23_20-14-33
  done: false
  episode_len_mean: 420.01
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.1997999999999545
  episode_reward_min: -6.149999999999935
  episodes_this_iter: 3
  episodes_total: 173
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9188411328527661
          entropy_coeff: 0.009999999999999998
          kl: 0.012618127034296675
          policy_loss: -0.0895100115901894
          total_loss: -0.09756068260305457
          vf_explained_var: 0.6629371047019958
          vf_loss: 0.00987592625638677
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,74,1780.85,74000,-4.1998,-2.29,-6.15,420.01


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-23_20-15-01
  done: false
  episode_len_mean: 418.02
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.248699999999954
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 176
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9646844956609937
          entropy_coeff: 0.009999999999999998
          kl: 0.012176157260704517
          policy_loss: -0.007155405150519477
          total_loss: 0.05461708969540066
          vf_explained_var: 0.35940635204315186
          vf_loss: 0.08020171970387714
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,75,1808.3,75000,-4.2487,-2.29,-10.8,418.02


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-23_20-15-28
  done: false
  episode_len_mean: 416.7
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.245099999999955
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 2
  episodes_total: 178
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0226288795471192
          entropy_coeff: 0.009999999999999998
          kl: 0.013476103359133499
          policy_loss: -0.0908059181438552
          total_loss: -0.09596127855281035
          vf_explained_var: 0.6901037693023682
          vf_loss: 0.013723314698371623
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,76,1835.39,76000,-4.2451,-2.29,-10.8,416.7




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-23_20-16-10
  done: false
  episode_len_mean: 414.64
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.243799999999956
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.107645716932085
          entropy_coeff: 0.009999999999999998
          kl: 0.01503162713928307
          policy_loss: -0.0060857413543595204
          total_loss: 0.028540202561351986
          vf_explained_var: 0.6484020948410034
          vf_loss: 0.05419923489292463
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,77,1877.8,77000,-4.2438,-2.29,-10.8,414.64


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-23_20-16-37
  done: false
  episode_len_mean: 413.36
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.2309999999999555
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.082269628842672
          entropy_coeff: 0.009999999999999998
          kl: 0.015308540536104311
          policy_loss: -0.05773127741283841
          total_loss: -0.05231473983989821
          vf_explained_var: 0.473812073469162
          vf_loss: 0.024708381936781935
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,78,1904.7,78000,-4.231,-2.29,-10.8,413.36


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-23_20-17-03
  done: false
  episode_len_mean: 412.14
  episode_media: {}
  episode_reward_max: -2.2899999999999965
  episode_reward_mean: -4.2187999999999555
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 2
  episodes_total: 186
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1205252488454183
          entropy_coeff: 0.009999999999999998
          kl: 0.012746691520429445
          policy_loss: -0.12080534994602203
          total_loss: -0.13127856486373476
          vf_explained_var: 0.7498727440834045
          vf_loss: 0.009457361687802606
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,79,1930.09,79000,-4.2188,-2.29,-10.8,412.14


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-23_20-17-28
  done: false
  episode_len_mean: 411.48
  episode_media: {}
  episode_reward_max: -3.4399999999999706
  episode_reward_mean: -4.232299999999956
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 189
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.056717982557085
          entropy_coeff: 0.009999999999999998
          kl: 0.011866953175714885
          policy_loss: 0.017162138554784986
          total_loss: 0.007729502684540219
          vf_explained_var: 0.4333215355873108
          vf_loss: 0.009947845745935208
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,80,1955.37,80000,-4.2323,-3.44,-10.8,411.48


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-23_20-17-54
  done: false
  episode_len_mean: 409.78
  episode_media: {}
  episode_reward_max: -3.4399999999999706
  episode_reward_mean: -4.2152999999999565
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 2
  episodes_total: 191
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9330639084180197
          entropy_coeff: 0.009999999999999998
          kl: 0.008581248702140234
          policy_loss: -0.10621350291702482
          total_loss: -0.11619161069393158
          vf_explained_var: 0.6715477705001831
          vf_loss: 0.008494405566145563
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,81,1981.05,81000,-4.2153,-3.44,-10.8,409.78


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-23_20-18-19
  done: false
  episode_len_mean: 408.23
  episode_media: {}
  episode_reward_max: -3.4399999999999706
  episode_reward_mean: -4.199799999999956
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 194
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8152099609375
          entropy_coeff: 0.009999999999999998
          kl: 0.008347796412510681
          policy_loss: 0.07708863673938646
          total_loss: 0.06732451137569216
          vf_explained_var: 0.5629695653915405
          vf_loss: 0.007553193099900253
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,82,2006.11,82000,-4.1998,-3.44,-10.8,408.23


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-23_20-18-47
  done: false
  episode_len_mean: 407.68
  episode_media: {}
  episode_reward_max: -3.4399999999999706
  episode_reward_mean: -4.194299999999957
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 2
  episodes_total: 196
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7945300300916036
          entropy_coeff: 0.009999999999999998
          kl: 0.011680508472430597
          policy_loss: -0.1279475533299976
          total_loss: -0.13547181694044008
          vf_explained_var: 0.47419288754463196
          vf_loss: 0.009252985667747756
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,83,2034.48,83000,-4.1943,-3.44,-10.8,407.68


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-23_20-19-14
  done: false
  episode_len_mean: 405.76
  episode_media: {}
  episode_reward_max: -3.4399999999999706
  episode_reward_mean: -4.175099999999957
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 199
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6659393588701883
          entropy_coeff: 0.009999999999999998
          kl: 0.0074699603837724265
          policy_loss: -0.10012543317344454
          total_loss: -0.10498916506767272
          vf_explained_var: 0.4111643433570862
          vf_loss: 0.01104866291085879
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,84,2061.57,84000,-4.1751,-3.44,-10.8,405.76


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-23_20-19-43
  done: false
  episode_len_mean: 403.8
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.135699999999957
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 202
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6172287146250406
          entropy_coeff: 0.009999999999999998
          kl: 0.0052688235348109435
          policy_loss: -0.07094322856929568
          total_loss: -0.07569107537468274
          vf_explained_var: 0.32514384388923645
          vf_loss: 0.010897559848510556
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,85,2089.81,85000,-4.1357,-3.35,-10.8,403.8


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-23_20-20-09
  done: false
  episode_len_mean: 402.86
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.126299999999957
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 205
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.657235242260827
          entropy_coeff: 0.009999999999999998
          kl: 0.005424683364210223
          policy_loss: 0.03086310132510132
          total_loss: 0.024021255928609105
          vf_explained_var: 0.5021877884864807
          vf_loss: 0.009188038097151244
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,86,2116.58,86000,-4.1263,-3.35,-10.8,402.86


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-23_20-20-37
  done: false
  episode_len_mean: 400.97
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.1073999999999575
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 208
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7541700548595853
          entropy_coeff: 0.009999999999999998
          kl: 0.007607002050742562
          policy_loss: 0.054009326299031574
          total_loss: 0.04664392082227601
          vf_explained_var: 0.400435209274292
          vf_loss: 0.009415592020377516
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,87,2144.15,87000,-4.1074,-3.35,-10.8,400.97




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-23_20-21-21
  done: false
  episode_len_mean: 398.57
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -4.083399999999958
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 211
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5784203860494825
          entropy_coeff: 0.009999999999999998
          kl: 0.011551709079408133
          policy_loss: 0.058495956824885474
          total_loss: 0.05326485070917342
          vf_explained_var: 0.4204216003417969
          vf_loss: 0.00939792726551079
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,88,2188.55,88000,-4.0834,-3.02,-10.8,398.57


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-23_20-21-50
  done: false
  episode_len_mean: 395.68
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -4.054499999999958
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 214
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5537443929248387
          entropy_coeff: 0.009999999999999998
          kl: 0.008563164172598666
          policy_loss: 0.03573604838715659
          total_loss: 0.03046239283349779
          vf_explained_var: 0.5202913284301758
          vf_loss: 0.009407472891163908
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,89,2217.65,89000,-4.0545,-3.02,-10.8,395.68


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-23_20-22-20
  done: false
  episode_len_mean: 393.1
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -4.028699999999959
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.654174440436893
          entropy_coeff: 0.009999999999999998
          kl: 0.007829043435568221
          policy_loss: 0.0350532054901123
          total_loss: 0.028885805937978955
          vf_explained_var: 0.4424043297767639
          vf_loss: 0.009591441344107604
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,90,2247.06,90000,-4.0287,-3.02,-10.8,393.1


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-23_20-22-48
  done: false
  episode_len_mean: 390.77
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -4.00539999999996
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 220
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6166292760107253
          entropy_coeff: 0.009999999999999998
          kl: 0.004768213438634916
          policy_loss: 0.060320100602176456
          total_loss: 0.051702542851368584
          vf_explained_var: 0.3882598876953125
          vf_loss: 0.007071916169176499
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,91,2275.56,91000,-4.0054,-3.02,-10.8,390.77


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-23_20-23-17
  done: false
  episode_len_mean: 388.25
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.98019999999996
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 223
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.702251328362359
          entropy_coeff: 0.009999999999999998
          kl: 0.010805581088072537
          policy_loss: 0.016165454023414186
          total_loss: 0.010589511030250125
          vf_explained_var: 0.3696785271167755
          vf_loss: 0.010906290519698006
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,92,2303.91,92000,-3.9802,-3.02,-10.8,388.25


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-23_20-23-43
  done: false
  episode_len_mean: 387.29
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.9705999999999606
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 2
  episodes_total: 225
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7564063787460327
          entropy_coeff: 0.009999999999999998
          kl: 0.01682790576995791
          policy_loss: -0.09833930648035473
          total_loss: -0.10409256137079663
          vf_explained_var: 0.5258105397224426
          vf_loss: 0.01096941492998869
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,93,2330.12,93000,-3.9706,-3.02,-10.8,387.29


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-23_20-24-10
  done: false
  episode_len_mean: 385.07
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.9483999999999617
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 228
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8053063882721796
          entropy_coeff: 0.009999999999999998
          kl: 0.009629658687166377
          policy_loss: 0.041120141165124045
          total_loss: 0.034226451565821964
          vf_explained_var: 0.5153694152832031
          vf_loss: 0.010677893887946589
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,94,2357.07,94000,-3.9484,-3.02,-10.8,385.07


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-23_20-24-38
  done: false
  episode_len_mean: 383.38
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.9314999999999625
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 231
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6193169554074605
          entropy_coeff: 0.009999999999999998
          kl: 0.011282217265880412
          policy_loss: 0.01797447486056222
          total_loss: 0.01294412695699268
          vf_explained_var: 0.5573305487632751
          vf_loss: 0.010598713006280984
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,95,2384.58,95000,-3.9315,-3.02,-10.8,383.38


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-23_20-25-07
  done: false
  episode_len_mean: 381.29
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.910599999999962
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 234
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.562432630856832
          entropy_coeff: 0.009999999999999998
          kl: 0.006882268474120382
          policy_loss: 0.01923524853256014
          total_loss: 0.01350867756538921
          vf_explained_var: 0.45818057656288147
          vf_loss: 0.009553640382364392
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,96,2413.71,96000,-3.9106,-3.02,-10.8,381.29


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-23_20-25-36
  done: false
  episode_len_mean: 378.86
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.8862999999999635
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 237
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.553562421268887
          entropy_coeff: 0.009999999999999998
          kl: 0.0060620118616449715
          policy_loss: 0.007332553135024177
          total_loss: 0.0021660505069626703
          vf_explained_var: 0.5605535507202148
          vf_loss: 0.010066022436755398
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 9700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,97,2443.29,97000,-3.8863,-3.02,-10.8,378.86




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-23_20-26-21
  done: false
  episode_len_mean: 376.56
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.863299999999963
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 240
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5483521382013956
          entropy_coeff: 0.009999999999999998
          kl: 0.0054754022612807695
          policy_loss: 0.028146551715003118
          total_loss: 0.022537926336129506
          vf_explained_var: 0.6109572052955627
          vf_loss: 0.009601126271041318
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,98,2488.12,98000,-3.8633,-3.02,-10.8,376.56


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-23_20-26-54
  done: false
  episode_len_mean: 373.83
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.8359999999999634
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 243
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5532474173439874
          entropy_coeff: 0.009999999999999998
          kl: 0.006060584821576103
          policy_loss: 0.018152669237719642
          total_loss: 0.012276412381066217
          vf_explained_var: 0.6182360649108887
          vf_loss: 0.009353191036886225
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,99,2521,99000,-3.836,-3.02,-10.8,373.83


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-23_20-27-23
  done: false
  episode_len_mean: 370.08
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.798499999999964
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 246
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4365587989489237
          entropy_coeff: 0.009999999999999998
          kl: 0.005685567311237784
          policy_loss: 0.05628261135684119
          total_loss: 0.05243414524528715
          vf_explained_var: 0.5703794956207275
          vf_loss: 0.010232841015224241
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 1000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,100,2549.73,100000,-3.7985,-3.02,-10.8,370.08


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-23_20-27-52
  done: false
  episode_len_mean: 367.25
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.7701999999999645
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 249
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3627658949957953
          entropy_coeff: 0.009999999999999998
          kl: 0.008386284736710476
          policy_loss: 0.05407146679030524
          total_loss: 0.0511649328801367
          vf_explained_var: 0.5876361131668091
          vf_loss: 0.010301813560848435
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 1010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,101,2579.35,101000,-3.7702,-3.02,-10.8,367.25


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-23_20-28-25
  done: false
  episode_len_mean: 364.88
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.746499999999965
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 252
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1744129604763456
          entropy_coeff: 0.009999999999999998
          kl: 0.005884557115698325
          policy_loss: -0.007331374784310659
          total_loss: -0.008494741552405887
          vf_explained_var: 0.5232735872268677
          vf_loss: 0.010286534846656852
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,102,2612.21,102000,-3.7465,-3.02,-10.8,364.88


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-23_20-28-59
  done: false
  episode_len_mean: 359.99
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -3.697599999999966
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 4
  episodes_total: 256
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3120519505606758
          entropy_coeff: 0.009999999999999998
          kl: 0.00574711409333296
          policy_loss: -0.019611439812514515
          total_loss: -0.0200772139761183
          vf_explained_var: 0.5569987893104553
          vf_loss: 0.012367389257997274
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,103,2645.38,103000,-3.6976,-2.84,-10.8,359.99


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-23_20-29-30
  done: false
  episode_len_mean: 356.79
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -3.6655999999999667
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 259
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2057986444897122
          entropy_coeff: 0.009999999999999998
          kl: 0.0039553284808955804
          policy_loss: 0.044641373389297065
          total_loss: 0.04168722530206045
          vf_explained_var: 0.6633824706077576
          vf_loss: 0.008906074930241124
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,104,2677.27,104000,-3.6656,-2.84,-10.8,356.79


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-23_20-30-04
  done: false
  episode_len_mean: 352.94
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -3.627099999999968
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 262
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2499314387639364
          entropy_coeff: 0.009999999999999998
          kl: 0.006337801373807789
          policy_loss: -0.1042589063445727
          total_loss: -0.10552658918831083
          vf_explained_var: 0.6161864399909973
          vf_loss: 0.011073185762183534
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,105,2710.58,105000,-3.6271,-2.84,-10.8,352.94


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-23_20-30-37
  done: false
  episode_len_mean: 348.13
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -3.5789999999999695
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 4
  episodes_total: 266
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3009725279278226
          entropy_coeff: 0.009999999999999998
          kl: 0.005861298460170556
          policy_loss: 3.6459995640648734e-05
          total_loss: -0.002341476579507192
          vf_explained_var: 0.5917928218841553
          vf_loss: 0.01048525520082977
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,106,2743.52,106000,-3.579,-2.84,-10.8,348.13


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-23_20-31-09
  done: false
  episode_len_mean: 345.82
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -3.5558999999999696
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3358632816208733
          entropy_coeff: 0.009999999999999998
          kl: 0.00621878413025401
          policy_loss: 0.07145275506708357
          total_loss: 0.06613705307245255
          vf_explained_var: 0.688347339630127
          vf_loss: 0.007887462675312741
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 1070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,107,2775.69,107000,-3.5559,-2.84,-10.8,345.82




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-23_20-32-02
  done: false
  episode_len_mean: 342.38
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.5214999999999703
  episode_reward_min: -10.79999999999993
  episodes_this_iter: 4
  episodes_total: 273
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2133645216623943
          entropy_coeff: 0.009999999999999998
          kl: 0.005265569971383321
          policy_loss: -0.02606847956776619
          total_loss: -0.02674600879351298
          vf_explained_var: 0.6032352447509766
          vf_loss: 0.011324472839219703
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,108,2828.66,108000,-3.5215,-2.58,-10.8,342.38


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-23_20-32-34
  done: false
  episode_len_mean: 339.74
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.426299999999971
  episode_reward_min: -5.849999999999944
  episodes_this_iter: 3
  episodes_total: 276
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.1952572968271045
          entropy_coeff: 0.009999999999999998
          kl: 0.006214833327015551
          policy_loss: 0.04670333787798882
          total_loss: 0.04312052751580874
          vf_explained_var: 0.6927326917648315
          vf_loss: 0.008214390041151395
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 1090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,109,2860.3,109000,-3.4263,-2.58,-5.85,339.74


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-23_20-33-08
  done: false
  episode_len_mean: 337.01
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.3893999999999718
  episode_reward_min: -5.849999999999944
  episodes_this_iter: 3
  episodes_total: 279
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.0069921685589684
          entropy_coeff: 0.009999999999999998
          kl: 0.004645967158639606
          policy_loss: -0.09345941932664978
          total_loss: -0.0920934672984812
          vf_explained_var: 0.5490016341209412
          vf_loss: 0.011319724428984854
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,110,2894.58,110000,-3.3894,-2.58,-5.85,337.01


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-23_20-33-39
  done: false
  episode_len_mean: 332.93
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.3292999999999724
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 4
  episodes_total: 283
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.1231139918168387
          entropy_coeff: 0.009999999999999998
          kl: 0.005088906568597472
          policy_loss: 0.031831791748603185
          total_loss: 0.03184889960620138
          vf_explained_var: 0.5676871538162231
          vf_loss: 0.01118463355426987
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,111,2926.11,111000,-3.3293,-2.58,-4.34,332.93


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-23_20-34-14
  done: false
  episode_len_mean: 329.89
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.298899999999973
  episode_reward_min: -4.339999999999952
  episodes_this_iter: 3
  episodes_total: 286
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0506624076101514
          entropy_coeff: 0.009999999999999998
          kl: 0.006033045068492271
          policy_loss: -0.11614346330364546
          total_loss: -0.11568058166238997
          vf_explained_var: 0.6099490523338318
          vf_loss: 0.010894091768811147
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,112,2960.97,112000,-3.2989,-2.58,-4.34,329.89


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-23_20-34-47
  done: false
  episode_len_mean: 325.5
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.2549999999999737
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 4
  episodes_total: 290
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.185866932074229
          entropy_coeff: 0.009999999999999998
          kl: 0.009425950458479883
          policy_loss: 0.06039156234926647
          total_loss: 0.05623731497261259
          vf_explained_var: 0.7189188599586487
          vf_loss: 0.0075865944226582846
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 1130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,113,2993.2,113000,-3.255,-2.58,-4.11,325.5


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-23_20-35-21
  done: false
  episode_len_mean: 321.28
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.2127999999999752
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 4
  episodes_total: 294
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9002747866842482
          entropy_coeff: 0.009999999999999998
          kl: 0.005560922450178947
          policy_loss: -0.01340582449403074
          total_loss: -0.011231425611509218
          vf_explained_var: 0.521113395690918
          vf_loss: 0.011107637164079481
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,114,3027.9,114000,-3.2128,-2.58,-4.08,321.28


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-23_20-35-57
  done: false
  episode_len_mean: 318.2
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.181999999999976
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 3
  episodes_total: 297
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9982279631826613
          entropy_coeff: 0.009999999999999998
          kl: 0.0045426295866692395
          policy_loss: 0.04446436141928037
          total_loss: 0.04255380092395677
          vf_explained_var: 0.5489307641983032
          vf_loss: 0.008014935203310517
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 1150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,115,3063.26,115000,-3.182,-2.58,-4.08,318.2




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-23_20-36-48
  done: false
  episode_len_mean: 314.93
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.1492999999999767
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 4
  episodes_total: 301
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9547284073299832
          entropy_coeff: 0.009999999999999998
          kl: 0.0046782191964315385
          policy_loss: 0.04509885377354092
          total_loss: 0.0471082699795564
          vf_explained_var: 0.44764506816864014
          vf_loss: 0.011527459126793677
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,116,3114.75,116000,-3.1493,-2.43,-4.08,314.93


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-23_20-37-25
  done: false
  episode_len_mean: 312.67
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.126699999999977
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 3
  episodes_total: 304
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.8998950137032403
          entropy_coeff: 0.009999999999999998
          kl: 0.007846293509318869
          policy_loss: -0.10812360809908973
          total_loss: -0.10586231475075086
          vf_explained_var: 0.4822577238082886
          vf_loss: 0.011235723468578524
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,117,3151.37,117000,-3.1267,-2.43,-4.08,312.67


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-23_20-37-59
  done: false
  episode_len_mean: 309.54
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.095399999999978
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 4
  episodes_total: 308
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.0087517466810014
          entropy_coeff: 0.009999999999999998
          kl: 0.004884231929074822
          policy_loss: 0.031157403811812402
          total_loss: 0.03135678031378322
          vf_explained_var: 0.49735718965530396
          vf_loss: 0.010271629510033463
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,118,3185.64,118000,-3.0954,-2.43,-4.08,309.54


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-23_20-38-33
  done: false
  episode_len_mean: 307.5
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.074999999999977
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 4
  episodes_total: 312
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.069027066230774
          entropy_coeff: 0.009999999999999998
          kl: 0.007856589972188823
          policy_loss: -0.01825839223133193
          total_loss: -0.019202705638276207
          vf_explained_var: 0.6112270951271057
          vf_loss: 0.00973367929044697
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,119,3219.75,119000,-3.075,-2.43,-4.08,307.5


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-23_20-39-08
  done: false
  episode_len_mean: 305.73
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.057299999999978
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 3
  episodes_total: 315
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.141917007499271
          entropy_coeff: 0.009999999999999998
          kl: 0.015377240431826245
          policy_loss: 0.031553730741143225
          total_loss: 0.027212206605407928
          vf_explained_var: 0.6696286797523499
          vf_loss: 0.007053617452685203
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,120,3254.21,120000,-3.0573,-2.43,-4.08,305.73


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-23_20-39-41
  done: false
  episode_len_mean: 303.84
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0383999999999793
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 4
  episodes_total: 319
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3498962243398032
          entropy_coeff: 0.009999999999999998
          kl: 0.008280241256824658
          policy_loss: -0.02803583898478084
          total_loss: -0.032556495981083976
          vf_explained_var: 0.6818796992301941
          vf_loss: 0.008965365930149953
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,121,3286.97,121000,-3.0384,-2.43,-4.08,303.84


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-23_20-40-11
  done: false
  episode_len_mean: 302.25
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0224999999999795
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.2891516023212009
          entropy_coeff: 0.009999999999999998
          kl: 0.008327042753141356
          policy_loss: 0.04810272802909215
          total_loss: 0.04189171443382899
          vf_explained_var: 0.7841580510139465
          vf_loss: 0.006667490440627767
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,122,3317.39,122000,-3.0225,-2.43,-4.08,302.25


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-23_20-40-44
  done: false
  episode_len_mean: 300.6
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0059999999999802
  episode_reward_min: -4.079999999999957
  episodes_this_iter: 3
  episodes_total: 325
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.181307397948371
          entropy_coeff: 0.009999999999999998
          kl: 0.008263692792202527
          policy_loss: -0.06112308435969883
          total_loss: -0.06336073784364595
          vf_explained_var: 0.6716057658195496
          vf_loss: 0.00956250752011935
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,123,3350.89,123000,-3.006,-2.43,-4.08,300.6


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-23_20-41-16
  done: false
  episode_len_mean: 297.87
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.978699999999981
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 329
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.2957955294185215
          entropy_coeff: 0.009999999999999998
          kl: 0.006066862298625213
          policy_loss: -0.005807141049040688
          total_loss: -0.008054251389371023
          vf_explained_var: 0.6710819602012634
          vf_loss: 0.01070136736250586
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,124,3382.87,124000,-2.9787,-2.43,-3.89,297.87




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-23_20-42-08
  done: false
  episode_len_mean: 295.56
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.955599999999981
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 332
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.1925435364246368
          entropy_coeff: 0.009999999999999998
          kl: 0.009759743030496024
          policy_loss: 0.04717342373397615
          total_loss: 0.04221992467840512
          vf_explained_var: 0.7084634304046631
          vf_loss: 0.006956690198017491
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,125,3433.88,125000,-2.9556,-2.43,-3.49,295.56


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-23_20-42-42
  done: false
  episode_len_mean: 293.39
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.9338999999999813
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 336
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0697135792838202
          entropy_coeff: 0.009999999999999998
          kl: 0.005242457397676425
          policy_loss: -0.0028939323292838204
          total_loss: -0.0032021626830101013
          vf_explained_var: 0.6675970554351807
          vf_loss: 0.01038071344503098
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,126,3468.04,126000,-2.9339,-2.43,-3.49,293.39


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-23_20-43-15
  done: false
  episode_len_mean: 291.85
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.918499999999982
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 339
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0591649545563593
          entropy_coeff: 0.009999999999999998
          kl: 0.008275681099250755
          policy_loss: 0.029317817091941832
          total_loss: 0.026270405451456705
          vf_explained_var: 0.6027352809906006
          vf_loss: 0.007531307482471068
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,127,3501.52,127000,-2.9185,-2.43,-3.49,291.85


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-23_20-43-51
  done: false
  episode_len_mean: 290.14
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.9013999999999824
  episode_reward_min: -3.429999999999971
  episodes_this_iter: 4
  episodes_total: 343
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.104745548301273
          entropy_coeff: 0.009999999999999998
          kl: 0.005706491799716397
          policy_loss: -0.01546491127875116
          total_loss: -0.01520564125643836
          vf_explained_var: 0.5047358870506287
          vf_loss: 0.011297805472794506
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,128,3536.86,128000,-2.9014,-2.43,-3.43,290.14


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-23_20-44-24
  done: false
  episode_len_mean: 289.24
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.892399999999982
  episode_reward_min: -3.429999999999971
  episodes_this_iter: 3
  episodes_total: 346
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9944541626506381
          entropy_coeff: 0.009999999999999998
          kl: 0.004239309299430932
          policy_loss: 0.044499366482098894
          total_loss: 0.042789509064621395
          vf_explained_var: 0.5701876282691956
          vf_loss: 0.00822806067751824
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,129,3570.7,129000,-2.8924,-2.43,-3.43,289.24


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-23_20-44-58
  done: false
  episode_len_mean: 287.49
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.874899999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 350
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.0474731279744043
          entropy_coeff: 0.009999999999999998
          kl: 0.0054970972704890325
          policy_loss: -0.008987388221753968
          total_loss: -0.008894858840439055
          vf_explained_var: 0.4955233633518219
          vf_loss: 0.010562964890980058
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,130,3604.72,130000,-2.8749,-2.43,-3.33,287.49


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-23_20-45-32
  done: false
  episode_len_mean: 287.03
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8702999999999834
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 353
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.9779867404037051
          entropy_coeff: 0.009999999999999998
          kl: 0.004113490452341513
          policy_loss: -0.028196162233750027
          total_loss: -0.028558612118164697
          vf_explained_var: 0.48503243923187256
          vf_loss: 0.009414205410414272
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,131,3638.53,131000,-2.8703,-2.43,-3.33,287.03


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-23_20-46-04
  done: false
  episode_len_mean: 286.75
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.867499999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 357
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906249999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0709841185145907
          entropy_coeff: 0.009999999999999998
          kl: 0.00757297766144889
          policy_loss: -0.0020407485879129833
          total_loss: -0.001028180287943946
          vf_explained_var: 0.46572473645210266
          vf_loss: 0.01171945347968075
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,132,3670.07,132000,-2.8675,-2.43,-3.33,286.75




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-23_20-46-55
  done: false
  episode_len_mean: 286.39
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.863899999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 360
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906249999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.05011418528027
          entropy_coeff: 0.009999999999999998
          kl: 0.005574738627327531
          policy_loss: -0.09128139913082123
          total_loss: -0.09005866870284081
          vf_explained_var: 0.4533504545688629
          vf_loss: 0.0117216973255078
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 13300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,133,3721.54,133000,-2.8639,-2.43,-3.33,286.39


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-23_20-47-29
  done: false
  episode_len_mean: 285.79
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.857899999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 364
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906249999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1006082309616936
          entropy_coeff: 0.009999999999999998
          kl: 0.004995693863303174
          policy_loss: 0.033707687507073085
          total_loss: 0.03299728367063734
          vf_explained_var: 0.5386841893196106
          vf_loss: 0.010293728744404183
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,134,3755.42,134000,-2.8579,-2.43,-3.33,285.79


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-23_20-48-04
  done: false
  episode_len_mean: 285.27
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.852699999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 367
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531249999999996
          cur_lr: 5.000000000000001e-05
          entropy: 0.8750174562136332
          entropy_coeff: 0.009999999999999998
          kl: 0.007067322249096522
          policy_loss: -0.08954561998446782
          total_loss: -0.08748951016200913
          vf_explained_var: 0.48682597279548645
          vf_loss: 0.010804904086722269
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,135,3790.39,135000,-2.8527,-2.43,-3.33,285.27


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-23_20-48-37
  done: false
  episode_len_mean: 284.78
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8477999999999835
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 371
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531249999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.006188366148207
          entropy_coeff: 0.009999999999999998
          kl: 0.003956554941638125
          policy_loss: 0.03021787264280849
          total_loss: 0.03068714141845703
          vf_explained_var: 0.543079137802124
          vf_loss: 0.010530381380683846
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,136,3823.06,136000,-2.8478,-2.43,-3.33,284.78


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-23_20-49-13
  done: false
  episode_len_mean: 283.88
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8387999999999836
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 375
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765624999999998e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8775059256288741
          entropy_coeff: 0.009999999999999998
          kl: 0.0029854191530691624
          policy_loss: 0.005993819567892286
          total_loss: 0.008168569869465299
          vf_explained_var: 0.49452459812164307
          vf_loss: 0.01094952066325479
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,137,3859.2,137000,-2.8388,-2.43,-3.33,283.88


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-23_20-49-48
  done: false
  episode_len_mean: 283.35
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.833499999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 378
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.882812499999999e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9951496773295933
          entropy_coeff: 0.009999999999999998
          kl: 0.004427126754461222
          policy_loss: -0.01316325060195393
          total_loss: -0.014735568480359184
          vf_explained_var: 0.528319776058197
          vf_loss: 0.008378963997691042
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,138,3894.07,138000,-2.8335,-2.43,-3.33,283.35


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-23_20-50-23
  done: false
  episode_len_mean: 283.22
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8321999999999834
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 382
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.029045448700587
          entropy_coeff: 0.009999999999999998
          kl: 0.008114386512434476
          policy_loss: 0.005348288102282418
          total_loss: 0.007305599169598685
          vf_explained_var: 0.42727237939834595
          vf_loss: 0.012247570055640407
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,139,3928.96,139000,-2.8322,-2.43,-3.33,283.22


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-23_20-50-58
  done: false
  episode_len_mean: 283.25
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.832499999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 385
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9733583039707607
          entropy_coeff: 0.009999999999999998
          kl: 0.005971279789129744
          policy_loss: -0.08449785924620098
          total_loss: -0.08197464396556219
          vf_explained_var: 0.4190077483654022
          vf_loss: 0.012256655873109897
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,140,3964.1,140000,-2.8325,-2.43,-3.33,283.25


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-23_20-51-31
  done: false
  episode_len_mean: 283.16
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.831599999999984
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 389
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.19374977350235
          entropy_coeff: 0.009999999999999998
          kl: 0.014758127081899235
          policy_loss: 0.03273546910948223
          total_loss: 0.03208401319053438
          vf_explained_var: 0.4842647612094879
          vf_loss: 0.011285683409207397
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 1410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,141,3997.46,141000,-2.8316,-2.43,-3.33,283.16




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-23_20-52-25
  done: false
  episode_len_mean: 283.05
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8304999999999843
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 392
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9981088777383168
          entropy_coeff: 0.009999999999999998
          kl: 0.006893086262272716
          policy_loss: -0.1105089783668518
          total_loss: -0.10850081791480383
          vf_explained_var: 0.48961180448532104
          vf_loss: 0.011989083689534002
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,142,4050.78,142000,-2.8305,-2.43,-3.33,283.05


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-23_20-52-58
  done: false
  episode_len_mean: 283.36
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8335999999999832
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 396
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.1174957970778148
          entropy_coeff: 0.009999999999999998
          kl: 0.00516655007707993
          policy_loss: 0.05290031863583459
          total_loss: 0.05155102262894313
          vf_explained_var: 0.5419435501098633
          vf_loss: 0.009825533312848873
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,143,4083.84,143000,-2.8336,-2.43,-3.33,283.36


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-23_20-53-34
  done: false
  episode_len_mean: 283.38
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.833799999999984
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 399
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9982407079802619
          entropy_coeff: 0.009999999999999998
          kl: 0.004833850991847655
          policy_loss: -0.11146353334188461
          total_loss: -0.10984368994832039
          vf_explained_var: 0.5132634043693542
          vf_loss: 0.011602138520942794
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,144,4119.6,144000,-2.8338,-2.43,-3.33,283.38


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-23_20-54-06
  done: false
  episode_len_mean: 284.35
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.8434999999999833
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 403
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2207031249999997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0785119407706791
          entropy_coeff: 0.009999999999999998
          kl: 0.005008976682392764
          policy_loss: 0.04093268190821012
          total_loss: 0.04035544652077887
          vf_explained_var: 0.5434441566467285
          vf_loss: 0.010207818696896236
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,145,4151.91,145000,-2.8435,-2.55,-3.33,284.35


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-23_20-54-41
  done: false
  episode_len_mean: 284.37
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.8436999999999824
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 406
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2207031249999997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8548009335994721
          entropy_coeff: 0.009999999999999998
          kl: 0.0068077180613358755
          policy_loss: -0.10355153133471807
          total_loss: -0.10043562476833662
          vf_explained_var: 0.47991448640823364
          vf_loss: 0.011663831853204304
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,146,4187.02,146000,-2.8437,-2.55,-3.33,284.37


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-23_20-55-15
  done: false
  episode_len_mean: 284.32
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.843199999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 410
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2207031249999997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8326170265674591
          entropy_coeff: 0.009999999999999998
          kl: 0.00294246937628052
          policy_loss: 0.03263414576649666
          total_loss: 0.0361057761642668
          vf_explained_var: 0.431357204914093
          vf_loss: 0.011797771789133549
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 1470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,147,4221.21,147000,-2.8432,-2.55,-3.33,284.32


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-23_20-55-50
  done: false
  episode_len_mean: 284.15
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.8414999999999835
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 414
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8466679824723138
          entropy_coeff: 0.009999999999999998
          kl: 0.005617746481387363
          policy_loss: 0.020825385881794823
          total_loss: 0.022434148854679533
          vf_explained_var: 0.5027387142181396
          vf_loss: 0.01007541493098769
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,148,4255.64,148000,-2.8415,-2.55,-3.33,284.15


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-23_20-56-24
  done: false
  episode_len_mean: 283.77
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -2.837699999999984
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 417
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8194616662131415
          entropy_coeff: 0.009999999999999998
          kl: 0.005100386791795586
          policy_loss: 0.025871543751822577
          total_loss: 0.026928832630316417
          vf_explained_var: 0.4632547199726105
          vf_loss: 0.009251881126935283
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,149,4290.26,149000,-2.8377,-2.55,-3.33,283.77




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-23_20-57-19
  done: false
  episode_len_mean: 282.62
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.826199999999983
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 421
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8803310016791026
          entropy_coeff: 0.009999999999999998
          kl: 0.00639419656737086
          policy_loss: 0.01668236955172486
          total_loss: 0.02051345482468605
          vf_explained_var: 0.40551719069480896
          vf_loss: 0.012634360883384942
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,150,4344.91,150000,-2.8262,-2.39,-3.33,282.62


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-23_20-57-52
  done: false
  episode_len_mean: 282.08
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.8207999999999833
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 425
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.121380211247338
          entropy_coeff: 0.009999999999999998
          kl: 0.007292591994450288
          policy_loss: -0.01521387245092127
          total_loss: -0.01414076027770837
          vf_explained_var: 0.4785309135913849
          vf_loss: 0.01228687535557482
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,151,4377.64,151000,-2.8208,-2.39,-3.33,282.08


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-23_20-58-24
  done: false
  episode_len_mean: 282.54
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.8253999999999837
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 428
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4802750057644314
          entropy_coeff: 0.009999999999999998
          kl: 0.011056762570985276
          policy_loss: 0.02076067907942666
          total_loss: 0.013996441000037723
          vf_explained_var: 0.6810218095779419
          vf_loss: 0.008038448015900536
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,152,4409.47,152000,-2.8254,-2.39,-3.33,282.54


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-23_20-58-55
  done: false
  episode_len_mean: 283.07
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.8306999999999833
  episode_reward_min: -3.2499999999999747
  episodes_this_iter: 3
  episodes_total: 431
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.492064082622528
          entropy_coeff: 0.009999999999999998
          kl: 0.02727181914111879
          policy_loss: 0.038672267480029
          total_loss: 0.03253387047184838
          vf_explained_var: 0.5777721405029297
          vf_loss: 0.008782080840319395
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 15300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,153,4441.12,153000,-2.8307,-2.39,-3.25,283.07


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-23_20-59-30
  done: false
  episode_len_mean: 282.86
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.828599999999983
  episode_reward_min: -3.2499999999999747
  episodes_this_iter: 4
  episodes_total: 435
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1552734375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1545261078410678
          entropy_coeff: 0.009999999999999998
          kl: 0.01713055703611938
          policy_loss: -0.0074344423082139755
          total_loss: -0.0067059030135472614
          vf_explained_var: 0.4937335252761841
          vf_loss: 0.012273650543971195
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,154,4475.75,154000,-2.8286,-2.39,-3.25,282.86


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-23_21-00-01
  done: false
  episode_len_mean: 283.24
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.832399999999983
  episode_reward_min: -3.2499999999999747
  episodes_this_iter: 3
  episodes_total: 438
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1552734375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.5989307694964938
          entropy_coeff: 0.009999999999999998
          kl: 0.015377903562008853
          policy_loss: 0.027444574568006728
          total_loss: 0.019204015036424
          vf_explained_var: 0.6849798560142517
          vf_loss: 0.007748607689023225
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,155,4507.11,155000,-2.8324,-2.39,-3.25,283.24


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-23_21-00-35
  done: false
  episode_len_mean: 283.8
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.837999999999983
  episode_reward_min: -3.2499999999999747
  episodes_this_iter: 3
  episodes_total: 441
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1552734375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4075118674172296
          entropy_coeff: 0.009999999999999998
          kl: 0.009255638915295784
          policy_loss: -0.053690113416976395
          total_loss: -0.05757428854703903
          vf_explained_var: 0.6933321952819824
          vf_loss: 0.010190859923346176
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 15600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,156,4540.31,156000,-2.838,-2.39,-3.25,283.8


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-23_21-01-05
  done: false
  episode_len_mean: 284.97
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.849699999999982
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 444
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1552734375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.7903712140189276
          entropy_coeff: 0.009999999999999998
          kl: 0.02462240391108226
          policy_loss: -0.09965616568095154
          total_loss: -0.110416480857465
          vf_explained_var: 0.8127371072769165
          vf_loss: 0.007143173465091321
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,157,4570.6,157000,-2.8497,-2.39,-3.28,284.97


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-23_21-01-34
  done: false
  episode_len_mean: 286.57
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.865699999999983
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 3
  episodes_total: 447
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6926487671004402
          entropy_coeff: 0.009999999999999998
          kl: 0.016602096840959903
          policy_loss: -0.019549724459648133
          total_loss: -0.030619422760274677
          vf_explained_var: 0.8775721192359924
          vf_loss: 0.005856565639583601
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,158,4599.43,158000,-2.8657,-2.39,-3.51,286.57




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-23_21-02-24
  done: false
  episode_len_mean: 287.96
  episode_media: {}
  episode_reward_max: -0.05000000000000231
  episode_reward_mean: -2.8492999999999826
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 451
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6324779431025187
          entropy_coeff: 0.009999999999999998
          kl: 0.01975761071489525
          policy_loss: -0.08191949625809987
          total_loss: -0.0280760467880302
          vf_explained_var: 0.55033940076828
          vf_loss: 0.07016795857602523
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,159,4649.63,159000,-2.8493,-0.05,-3.51,287.96


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-23_21-02-56
  done: false
  episode_len_mean: 288.24
  episode_media: {}
  episode_reward_max: -0.05000000000000231
  episode_reward_mean: -2.8520999999999828
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 3
  episodes_total: 454
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6740461150805155
          entropy_coeff: 0.009999999999999998
          kl: 0.011952111454866474
          policy_loss: -0.11398539323773649
          total_loss: -0.11876474426438412
          vf_explained_var: 0.7207739949226379
          vf_loss: 0.011960945591434008
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,160,4681.58,160000,-2.8521,-0.05,-3.51,288.24


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-23_21-03-33
  done: false
  episode_len_mean: 288.69
  episode_media: {}
  episode_reward_max: -0.010000000000002307
  episode_reward_mean: -2.836199999999982
  episode_reward_min: -3.9199999999999595
  episodes_this_iter: 3
  episodes_total: 457
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.228187574280633
          entropy_coeff: 0.009999999999999998
          kl: 0.011336155759983058
          policy_loss: -0.08480489146378305
          total_loss: 0.00015027709305286406
          vf_explained_var: 0.4136126935482025
          vf_loss: 0.09723689134957061
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,161,4718.27,161000,-2.8362,-0.01,-3.92,288.69


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-23_21-04-06
  done: false
  episode_len_mean: 289.14
  episode_media: {}
  episode_reward_max: 1.130000000000025
  episode_reward_mean: -2.810199999999982
  episode_reward_min: -3.9199999999999595
  episodes_this_iter: 4
  episodes_total: 461
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4272215525309244
          entropy_coeff: 0.009999999999999998
          kl: 0.010785974032455946
          policy_loss: -0.069745624727673
          total_loss: 0.07002708295153247
          vf_explained_var: 0.2176138162612915
          vf_loss: 0.15404477778615222
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 1620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,162,4751.94,162000,-2.8102,1.13,-3.92,289.14


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-23_21-04-40
  done: false
  episode_len_mean: 289.55
  episode_media: {}
  episode_reward_max: 1.9900000000000404
  episode_reward_mean: -2.7927999999999815
  episode_reward_min: -6.839999999999961
  episodes_this_iter: 3
  episodes_total: 464
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.6031615283754137
          entropy_coeff: 0.009999999999999998
          kl: 0.014439007899354945
          policy_loss: 0.1372931925786866
          total_loss: 0.42180516918500266
          vf_explained_var: 0.24463015794754028
          vf_loss: 0.3005433910836776
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,163,4785.63,163000,-2.7928,1.99,-6.84,289.55


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-23_21-05-14
  done: false
  episode_len_mean: 289.73
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8006999999999804
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 3
  episodes_total: 467
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3732910156250005e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4081464277373419
          entropy_coeff: 0.009999999999999998
          kl: 0.02435766332787832
          policy_loss: 0.004251981816358037
          total_loss: 0.6614922708935208
          vf_explained_var: 0.3941268026828766
          vf_loss: 0.6713214175568687
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 16400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,164,4819.83,164000,-2.8007,5.14,-12.95,289.73


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-23_21-05-48
  done: false
  episode_len_mean: 290.94
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.778599999999981
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 4
  episodes_total: 471
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0599365234374995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.655023729801178
          entropy_coeff: 0.009999999999999998
          kl: 0.023019411809865806
          policy_loss: -0.014559244447284275
          total_loss: 0.9201898902654648
          vf_explained_var: 0.6911690831184387
          vf_loss: 0.9512989044189453
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 16500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,165,4853.76,165000,-2.7786,5.14,-12.95,290.94


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-23_21-06-16
  done: false
  episode_len_mean: 293.72
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.81279999999998
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 2
  episodes_total: 473
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.08990478515625e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.18338414033254
          entropy_coeff: 0.009999999999999998
          kl: 0.019499213920965947
          policy_loss: -0.04436359682844745
          total_loss: 0.3624800162182914
          vf_explained_var: 0.7300177812576294
          vf_loss: 0.4286768512593375
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,166,4880.99,166000,-2.8128,5.14,-12.95,293.72


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-23_21-06-44
  done: false
  episode_len_mean: 296.79
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8495999999999797
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 3
  episodes_total: 476
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.08990478515625e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.170417000187768
          entropy_coeff: 0.009999999999999998
          kl: 0.021282693707756154
          policy_loss: -0.010070914568172561
          total_loss: 0.5019467497865359
          vf_explained_var: 0.727685809135437
          vf_loss: 0.5337211754586961
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,167,4909.15,167000,-2.8496,5.14,-12.95,296.79


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-23_21-07-13
  done: false
  episode_len_mean: 298.82
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8653999999999793
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 2
  episodes_total: 478
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.634857177734377e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.142497566011217
          entropy_coeff: 0.009999999999999998
          kl: 0.019964642372510423
          policy_loss: -0.07920310679409238
          total_loss: 0.512857964883248
          vf_explained_var: 0.5569603443145752
          vf_loss: 0.6134851232171059
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,168,4938.04,168000,-2.8654,5.14,-12.95,298.82




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-23_21-08-00
  done: false
  episode_len_mean: 301.36
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8444999999999787
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 3
  episodes_total: 481
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.634857177734377e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.258009452290005
          entropy_coeff: 0.009999999999999998
          kl: 0.015637014326936496
          policy_loss: -0.15188827796114815
          total_loss: 0.25030273869633674
          vf_explained_var: 0.5840619802474976
          vf_loss: 0.4247703790664673
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 16900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,169,4985.12,169000,-2.8445,5.14,-12.95,301.36


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-23_21-08-28
  done: false
  episode_len_mean: 304.4
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.9675999999999774
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 3
  episodes_total: 484
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.634857177734377e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.163489744398329
          entropy_coeff: 0.009999999999999998
          kl: 0.025200680899626595
          policy_loss: -0.03150860551330778
          total_loss: 0.6687155942949984
          vf_explained_var: 0.44684427976608276
          vf_loss: 0.721857926580641
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,170,5013.28,170000,-2.9676,5.14,-12.95,304.4


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-23_21-08-53
  done: false
  episode_len_mean: 307.23
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.9914999999999763
  episode_reward_min: -12.94999999999995
  episodes_this_iter: 2
  episodes_total: 486
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.95228576660156e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.227731606695387
          entropy_coeff: 0.009999999999999998
          kl: 0.012040159775065017
          policy_loss: 0.09870593970020612
          total_loss: 0.4014350694086817
          vf_explained_var: 0.6259739995002747
          vf_loss: 0.3250056081347995
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,171,5038.13,171000,-2.9915,5.14,-12.95,307.23


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-23_21-09-20
  done: false
  episode_len_mean: 310.94
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -3.142499999999976
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 489
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.95228576660156e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.2069132698906793
          entropy_coeff: 0.009999999999999998
          kl: 0.02016214612311183
          policy_loss: -0.14660294577479363
          total_loss: 0.322786722994513
          vf_explained_var: 0.5421581268310547
          vf_loss: 0.4914574020438724
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,172,5064.92,172000,-3.1425,5.14,-17.55,310.94


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-23_21-09-46
  done: false
  episode_len_mean: 313.63
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -3.1248999999999763
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 491
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.340391770998637
          entropy_coeff: 0.009999999999999998
          kl: 0.013836798367225513
          policy_loss: -0.008053399187823137
          total_loss: 0.45841788889633284
          vf_explained_var: 0.3683356046676636
          vf_loss: 0.4898737594485283
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,173,5091.57,173000,-3.1249,5.14,-17.55,313.63


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-23_21-10-10
  done: false
  episode_len_mean: 316.23
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -3.039399999999975
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 493
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.2673724518881904
          entropy_coeff: 0.009999999999999998
          kl: 0.012208108649702294
          policy_loss: -0.1481888903511895
          total_loss: -0.03018768255909284
          vf_explained_var: 0.9031509160995483
          vf_loss: 0.1406736571341753
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,174,5115.54,174000,-3.0394,5.14,-17.55,316.23


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-23_21-10-38
  done: false
  episode_len_mean: 319.64
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.979299999999973
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 496
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.2454637580447727
          entropy_coeff: 0.009999999999999998
          kl: 0.01963621514898556
          policy_loss: -0.08880255714886719
          total_loss: 0.12360305823385716
          vf_explained_var: 0.5949297547340393
          vf_loss: 0.2348582059972816
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 1750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,175,5142.84,175000,-2.9793,5.14,-17.55,319.64


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-23_21-11-07
  done: false
  episode_len_mean: 322.75
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.9929999999999732
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 499
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.273485853936937
          entropy_coeff: 0.009999999999999998
          kl: 0.013754736601652004
          policy_loss: -0.02939257820447286
          total_loss: 0.4148647008670701
          vf_explained_var: 0.7099465727806091
          vf_loss: 0.4669907015230921
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 1760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,176,5171.74,176000,-2.993,5.14,-17.55,322.75


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-23_21-11-34
  done: false
  episode_len_mean: 324.62
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8901999999999726
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 501
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.2893944395913017
          entropy_coeff: 0.009999999999999998
          kl: 0.008556843554443697
          policy_loss: -0.10940224470363723
          total_loss: 0.06422823203934563
          vf_explained_var: 0.8405069708824158
          vf_loss: 0.19652353020177948
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,177,5198.83,177000,-2.8902,5.14,-17.55,324.62


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-23_21-12-00
  done: false
  episode_len_mean: 328.35
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.8444999999999716
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 504
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.232221841812134
          entropy_coeff: 0.009999999999999998
          kl: 0.012551946859083252
          policy_loss: 0.04302969210677677
          total_loss: 0.1950057026412752
          vf_explained_var: 0.668362021446228
          vf_loss: 0.17429692470985983
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 17800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,178,5225.37,178000,-2.8445,5.14,-17.55,328.35


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-23_21-12-30
  done: false
  episode_len_mean: 329.57
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.7651999999999703
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 506
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.135526998837789
          entropy_coeff: 0.009999999999999998
          kl: 0.008853672497034035
          policy_loss: -0.17549099483423763
          total_loss: 0.2306461897989114
          vf_explained_var: 0.8901785016059875
          vf_loss: 0.42749152415328556
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,179,5254.61,179000,-2.7652,5.14,-17.55,329.57


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-23_21-12-57
  done: false
  episode_len_mean: 332.29
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.7278999999999685
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 509
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.158736801147461
          entropy_coeff: 0.009999999999999998
          kl: 0.008841052576846285
          policy_loss: -0.0181614614609215
          total_loss: 0.03943174526923233
          vf_explained_var: 0.6441906690597534
          vf_loss: 0.07917965137296253
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,180,5281.99,180000,-2.7279,5.14,-17.55,332.29




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-23_21-13-41
  done: false
  episode_len_mean: 335.55
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.587599999999967
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 512
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.2710063616434732
          entropy_coeff: 0.009999999999999998
          kl: 0.008259669278860847
          policy_loss: -0.006067297524876065
          total_loss: 0.1534433269666301
          vf_explained_var: 0.8393402695655823
          vf_loss: 0.1822198267922633
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,181,5325.98,181000,-2.5876,5.14,-17.55,335.55


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-23_21-14-07
  done: false
  episode_len_mean: 337.54
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.466599999999966
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 514
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.286147379875183
          entropy_coeff: 0.009999999999999998
          kl: 0.009197345350410677
          policy_loss: 0.028468621232443387
          total_loss: 0.05458464783926805
          vf_explained_var: 0.667973518371582
          vf_loss: 0.04897653909607066
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 1820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,182,5352.05,182000,-2.4666,5.14,-17.55,337.54


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-23_21-14-34
  done: false
  episode_len_mean: 340.81
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.3672999999999647
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 517
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00010428428649902343
          cur_lr: 5.000000000000001e-05
          entropy: 2.1887316624323527
          entropy_coeff: 0.009999999999999998
          kl: 0.021629483294293077
          policy_loss: 0.03237164095044136
          total_loss: 0.37523557961814935
          vf_explained_var: 0.4271693825721741
          vf_loss: 0.36474899573044645
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,183,5379.44,183000,-2.3673,5.14,-17.55,340.81


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-23_21-15-01
  done: false
  episode_len_mean: 343.2
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.2887999999999638
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 519
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001564264297485352
          cur_lr: 5.000000000000001e-05
          entropy: 2.145202128092448
          entropy_coeff: 0.009999999999999998
          kl: 0.008527028899606595
          policy_loss: -0.1645810615685251
          total_loss: -0.011178689532809788
          vf_explained_var: 0.9083503484725952
          vf_loss: 0.17485305770403808
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,184,5406.42,184000,-2.2888,5.14,-17.55,343.2


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-23_21-15-28
  done: false
  episode_len_mean: 347.18
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.2441999999999624
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 522
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001564264297485352
          cur_lr: 5.000000000000001e-05
          entropy: 2.2368374188741047
          entropy_coeff: 0.009999999999999998
          kl: 0.01334640211383918
          policy_loss: 0.09934771872229047
          total_loss: 0.29432862218883304
          vf_explained_var: 0.8607624769210815
          vf_loss: 0.21734719168808725
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 1850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,185,5433.03,185000,-2.2442,5.14,-17.55,347.18


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-23_21-15-53
  done: false
  episode_len_mean: 349.83
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.1586999999999614
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 524
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001564264297485352
          cur_lr: 5.000000000000001e-05
          entropy: 2.3126681751675076
          entropy_coeff: 0.009999999999999998
          kl: 0.022209773416866932
          policy_loss: -0.039041568256086776
          total_loss: 0.08232333999541071
          vf_explained_var: 0.8914930820465088
          vf_loss: 0.14448811478084989
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,186,5457.73,186000,-2.1587,5.14,-17.55,349.83


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-23_21-16-19
  done: false
  episode_len_mean: 352.41
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.1311999999999602
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 527
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.2256024016274347
          entropy_coeff: 0.009999999999999998
          kl: 0.015309936172273374
          policy_loss: -0.08574139144685533
          total_loss: 0.12911696640981568
          vf_explained_var: 0.7263476252555847
          vf_loss: 0.23711078754729695
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,187,5483.7,187000,-2.1312,5.14,-17.55,352.41


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-23_21-16-43
  done: false
  episode_len_mean: 355.14
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -2.036099999999959
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 529
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.2650026109483505
          entropy_coeff: 0.009999999999999998
          kl: 0.009545636737370383
          policy_loss: -0.16205463463233577
          total_loss: -0.08102078032162455
          vf_explained_var: 0.9300705194473267
          vf_loss: 0.10368164305885633
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,188,5507.79,188000,-2.0361,5.14,-17.55,355.14


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-23_21-17-10
  done: false
  episode_len_mean: 358.29
  episode_media: {}
  episode_reward_max: 5.140000000000036
  episode_reward_mean: -1.9054999999999571
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 532
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.065948634677463
          entropy_coeff: 0.009999999999999998
          kl: 0.010275334894681245
          policy_loss: 0.056790202442142695
          total_loss: 0.07832690328359604
          vf_explained_var: 0.963187575340271
          vf_loss: 0.042193776244918504
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,189,5535.3,189000,-1.9055,5.14,-17.55,358.29




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-23_21-18-08
  done: false
  episode_len_mean: 358.41
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: -1.6841999999999562
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 535
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1319031185574002
          entropy_coeff: 0.009999999999999998
          kl: 0.012016317514404937
          policy_loss: -0.027953410893678664
          total_loss: 0.2537827313774162
          vf_explained_var: 0.4642668068408966
          vf_loss: 0.3030523467395041
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,190,5592.34,190000,-1.6842,9.83,-17.55,358.41


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-23_21-18-30
  done: false
  episode_len_mean: 361.5
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: -1.6231999999999547
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 2
  episodes_total: 537
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.2585471921496922
          entropy_coeff: 0.009999999999999998
          kl: 0.01364848854363797
          policy_loss: 0.028886329548226463
          total_loss: 0.2897060186498695
          vf_explained_var: 0.2186102569103241
          vf_loss: 0.28340195847882166
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 1910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,191,5614.83,191000,-1.6232,9.83,-17.55,361.5




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-23_21-19-36
  done: false
  episode_len_mean: 361.8
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: -1.5221999999999545
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 4
  episodes_total: 541
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1226214210192365
          entropy_coeff: 0.009999999999999998
          kl: 0.011821567892069713
          policy_loss: 0.11560804636942015
          total_loss: 0.5437354048921003
          vf_explained_var: 0.5227752327919006
          vf_loss: 0.4493507843050692
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 19200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,192,5681.17,192000,-1.5222,9.83,-17.55,361.8




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-23_21-20-43
  done: false
  episode_len_mean: 360.97
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: -1.439999999999954
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 544
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.2463506089316474
          entropy_coeff: 0.009999999999999998
          kl: 0.015580387560224215
          policy_loss: 0.19161078267627293
          total_loss: 0.4339534211282929
          vf_explained_var: 0.7491140365600586
          vf_loss: 0.2648024895124965
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 19300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,193,5747.77,193000,-1.44,9.83,-17.55,360.97


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-23_21-21-15
  done: false
  episode_len_mean: 361.07
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: -1.377399999999953
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 547
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.100402002864414
          entropy_coeff: 0.009999999999999998
          kl: 0.012449790051811589
          policy_loss: 0.07725811186763976
          total_loss: 0.3238889319201311
          vf_explained_var: 0.6816869378089905
          vf_loss: 0.2676319206754367
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,194,5779.87,194000,-1.3774,9.83,-17.55,361.07




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-23_21-22-02
  done: false
  episode_len_mean: 359.52
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: -1.1389999999999516
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 4
  episodes_total: 551
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.213673135969374
          entropy_coeff: 0.009999999999999998
          kl: 0.011114096513302465
          policy_loss: -0.048851247959666784
          total_loss: 0.42414777394798064
          vf_explained_var: 0.7969878911972046
          vf_loss: 0.495133144987954
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,195,5826.42,195000,-1.139,9.87,-17.55,359.52


Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-23_21-22-32
  done: false
  episode_len_mean: 360.61
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: -1.076799999999951
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 554
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.145713872379727
          entropy_coeff: 0.009999999999999998
          kl: 0.012946597442134037
          policy_loss: -0.07241265902088748
          total_loss: 0.16084849884112676
          vf_explained_var: 0.25786706805229187
          vf_loss: 0.2547152601182461
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,196,5856.82,196000,-1.0768,9.87,-17.55,360.61




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-23_21-25-14
  done: false
  episode_len_mean: 348.45
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: -0.46139999999995124
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 7
  episodes_total: 561
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.9868686172697279
          entropy_coeff: 0.009999999999999998
          kl: 0.010770091599761104
          policy_loss: -0.08007577508687973
          total_loss: 0.6983074559105767
          vf_explained_var: 0.6820271611213684
          vf_loss: 0.7982493920458688
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,197,6018.15,197000,-0.4614,9.87,-17.55,348.45




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-23_21-27-16
  done: false
  episode_len_mean: 342.59
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 0.04570000000004902
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 6
  episodes_total: 567
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.9933779875437418
          entropy_coeff: 0.009999999999999998
          kl: 0.011511207897614244
          policy_loss: -0.04364959034654829
          total_loss: 0.526906614502271
          vf_explained_var: 0.9197143316268921
          vf_loss: 0.5904872824748357
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 1980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,198,6140.29,198000,0.0457,9.87,-17.55,342.59




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-23_21-28-29
  done: false
  episode_len_mean: 339.9
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 0.14230000000004836
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 3
  episodes_total: 570
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.923460070292155
          entropy_coeff: 0.009999999999999998
          kl: 0.012276125898168842
          policy_loss: -0.012769325574239095
          total_loss: 0.3508809619479709
          vf_explained_var: 0.7758435606956482
          vf_loss: 0.3828820081220733
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 1990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,199,6213.65,199000,0.1423,9.87,-17.55,339.9




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-23_21-30-06
  done: false
  episode_len_mean: 329.5
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 0.7316000000000484
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 6
  episodes_total: 576
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1272336231337654
          entropy_coeff: 0.009999999999999998
          kl: 0.014285732910615481
          policy_loss: -0.1234145758052667
          total_loss: 0.21762380252281824
          vf_explained_var: 0.28377020359039307
          vf_loss: 0.36230736374855044
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,200,6310.81,200000,0.7316,9.87,-17.55,329.5




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-23_21-32-00
  done: false
  episode_len_mean: 317.62
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 1.2607000000000468
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 6
  episodes_total: 582
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1007375836372377
          entropy_coeff: 0.009999999999999998
          kl: 0.009315729289143506
          policy_loss: 0.041377445889843836
          total_loss: 0.3825796961784363
          vf_explained_var: 0.2841889560222626
          vf_loss: 0.36220743540260525
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,201,6424.26,201000,1.2607,9.87,-17.55,317.62




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-23_21-33-20
  done: false
  episode_len_mean: 309.76
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 1.6536000000000461
  episode_reward_min: -17.550000000000065
  episodes_this_iter: 4
  episodes_total: 586
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.08216839366489
          entropy_coeff: 0.009999999999999998
          kl: 0.012125372538746898
          policy_loss: -0.05179716265863842
          total_loss: 0.3165338936779234
          vf_explained_var: 0.7679511904716492
          vf_loss: 0.3891498984562026
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,202,6504.25,202000,1.6536,9.87,-17.55,309.76




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-23_21-38-08
  done: false
  episode_len_mean: 265.7
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 3.1679000000000412
  episode_reward_min: -11.529999999999928
  episodes_this_iter: 14
  episodes_total: 600
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.403108505407969
          entropy_coeff: 0.009999999999999998
          kl: 0.013214262961980723
          policy_loss: -0.18217792842123243
          total_loss: 0.4710430810848872
          vf_explained_var: 0.9365185499191284
          vf_loss: 0.6672489908006456
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,203,6792.33,203000,3.1679,9.9,-11.53,265.7




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-23_21-40-25
  done: false
  episode_len_mean: 251.47
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 3.5590000000000397
  episode_reward_min: -11.529999999999928
  episodes_this_iter: 6
  episodes_total: 606
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.9703303707970514
          entropy_coeff: 0.009999999999999998
          kl: 0.010482035541164001
          policy_loss: 0.07012307213412391
          total_loss: 0.4441275811029805
          vf_explained_var: 0.6932211518287659
          vf_loss: 0.39370535446537863
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,204,6929.87,204000,3.559,9.9,-11.53,251.47




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-23_21-44-07
  done: false
  episode_len_mean: 220.42
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 4.296100000000034
  episode_reward_min: -10.259999999999923
  episodes_this_iter: 11
  episodes_total: 617
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.4449400617016686
          entropy_coeff: 0.009999999999999998
          kl: 0.00922859213542044
          policy_loss: -0.08635116500986947
          total_loss: 0.32329024577306376
          vf_explained_var: 0.5456183552742004
          vf_loss: 0.42408863951762515
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,205,7151.49,205000,4.2961,9.9,-10.26,220.42




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-23_21-46-50
  done: false
  episode_len_mean: 201.38
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 4.702600000000031
  episode_reward_min: -10.259999999999923
  episodes_this_iter: 7
  episodes_total: 624
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.8479074557622275
          entropy_coeff: 0.009999999999999998
          kl: 0.008641582810016542
          policy_loss: 0.05757935245831807
          total_loss: 0.46248715179454947
          vf_explained_var: 0.930167555809021
          vf_loss: 0.4233848365644614
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,206,7314.6,206000,4.7026,9.92,-10.26,201.38




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-23_21-51-19
  done: false
  episode_len_mean: 161.12
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 5.640300000000024
  episode_reward_min: -10.259999999999923
  episodes_this_iter: 13
  episodes_total: 637
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.8753045373492772
          entropy_coeff: 0.009999999999999998
          kl: 0.015073288873997113
          policy_loss: -0.18166353404521943
          total_loss: 0.4177710900704066
          vf_explained_var: 0.36157354712486267
          vf_loss: 0.6181841308871905
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,207,7583.33,207000,5.6403,9.92,-10.26,161.12




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-23_21-57-51
  done: false
  episode_len_mean: 116.3
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 7.199400000000018
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 19
  episodes_total: 656
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.9260768161879644
          entropy_coeff: 0.009999999999999998
          kl: 0.01857156476964878
          policy_loss: 0.1455887140499221
          total_loss: 0.5220456113417943
          vf_explained_var: 0.9548944234848022
          vf_loss: 0.39571330580446457
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,208,7975.28,208000,7.1994,9.92,-6.35,116.3




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-23_22-02-09
  done: false
  episode_len_mean: 106.97
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 7.412800000000015
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 13
  episodes_total: 669
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.8666565312279595
          entropy_coeff: 0.009999999999999998
          kl: 0.012611360034385724
          policy_loss: -0.1265413729680909
          total_loss: 0.5195617288351059
          vf_explained_var: 0.9451103806495667
          vf_loss: 0.6647667007313834
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,209,8232.85,209000,7.4128,9.92,-6.35,106.97




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-23_22-06-12
  done: false
  episode_len_mean: 90.61
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 7.8745000000000145
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 12
  episodes_total: 681
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1319461782773335
          entropy_coeff: 0.009999999999999998
          kl: 0.01234258680754553
          policy_loss: 0.0057180222537782455
          total_loss: 0.24485843856301573
          vf_explained_var: 0.6254619359970093
          vf_loss: 0.2604569810960028
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,210,8475.95,210000,7.8745,9.93,-6.35,90.61




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-23_22-11-15
  done: false
  episode_len_mean: 86.62
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 7.914300000000012
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 14
  episodes_total: 695
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.4584123465749952
          entropy_coeff: 0.009999999999999998
          kl: 0.01684644817605538
          policy_loss: 0.03130647771888309
          total_loss: 0.9256586124499638
          vf_explained_var: 0.9142425656318665
          vf_loss: 0.9089323023955027
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,211,8779.49,211000,7.9143,9.93,-6.35,86.62




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-23_22-13-15
  done: false
  episode_len_mean: 86.05
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.929700000000013
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 6
  episodes_total: 701
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.1599000334739684
          entropy_coeff: 0.009999999999999998
          kl: 0.015000025198013205
          policy_loss: -0.028348036772674983
          total_loss: 0.46036309632990097
          vf_explained_var: 0.44487571716308594
          vf_loss: 0.5103066146373749
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 2120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,212,8899.07,212000,7.9297,9.94,-6.35,86.05




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-23_22-17-11
  done: false
  episode_len_mean: 82.29
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.928500000000011
  episode_reward_min: -6.349999999999955
  episodes_this_iter: 10
  episodes_total: 711
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.8945344673262703
          entropy_coeff: 0.009999999999999998
          kl: 0.010815200475052677
          policy_loss: -0.077765863471561
          total_loss: 0.5967002282540004
          vf_explained_var: 0.6744087338447571
          vf_loss: 0.6934088902340995
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,213,9135.61,213000,7.9285,9.94,-6.35,82.29




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-23_22-26-27
  done: false
  episode_len_mean: 68.41
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 8.225900000000008
  episode_reward_min: -5.109999999999947
  episodes_this_iter: 27
  episodes_total: 738
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 0.760629743999905
          entropy_coeff: 0.009999999999999998
          kl: 0.006603608725905226
          policy_loss: -0.05232590263088544
          total_loss: 0.9284400920901034
          vf_explained_var: 0.9370135068893433
          vf_loss: 0.9883707526657316
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,214,9690.84,214000,8.2259,9.94,-5.11,68.41




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-23_22-39-59
  done: false
  episode_len_mean: 50.39
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 8.752600000000006
  episode_reward_min: -5.109999999999947
  episodes_this_iter: 40
  episodes_total: 778
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.1999010317855412
          entropy_coeff: 0.009999999999999998
          kl: 0.008945223117372928
          policy_loss: -0.0362746246986919
          total_loss: 0.41656385784347855
          vf_explained_var: 0.37114444375038147
          vf_loss: 0.46483539508448707
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 2150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,215,10503.4,215000,8.7526,9.94,-5.11,50.39




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-23_22-44-59
  done: false
  episode_len_mean: 53.49
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 8.790600000000007
  episode_reward_min: -5.109999999999947
  episodes_this_iter: 14
  episodes_total: 792
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 2.108937242296007
          entropy_coeff: 0.009999999999999998
          kl: 0.015824962639999225
          policy_loss: 0.10529365357425478
          total_loss: 0.44902627252870136
          vf_explained_var: 0.1266472488641739
          vf_loss: 0.3648182738158438
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,216,10803.5,216000,8.7906,9.94,-5.11,53.49




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-23_22-57-54
  done: false
  episode_len_mean: 36.24
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.330200000000003
  episode_reward_min: -0.33999999999991964
  episodes_this_iter: 38
  episodes_total: 830
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.4087916274865468
          entropy_coeff: 0.009999999999999998
          kl: 0.01009425514542072
          policy_loss: 0.17532012230820126
          total_loss: 0.6073270304335489
          vf_explained_var: 0.9620267152786255
          vf_loss: 0.44609245599971875
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 21700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,217,11578.5,217000,9.3302,9.94,-0.34,36.24




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-23_23-09-46
  done: false
  episode_len_mean: 32.74
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.395900000000001
  episode_reward_min: -7.289999999999935
  episodes_this_iter: 35
  episodes_total: 865
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.533068014515771
          entropy_coeff: 0.009999999999999998
          kl: 0.011312260293765301
          policy_loss: 0.10141057852241728
          total_loss: 0.5495521164602704
          vf_explained_var: 0.576428234577179
          vf_loss: 0.46346956193447114
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,218,12289.8,218000,9.3959,9.94,-7.29,32.74




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-23_23-25-41
  done: false
  episode_len_mean: 26.51
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.498800000000001
  episode_reward_min: -7.289999999999935
  episodes_this_iter: 46
  episodes_total: 911
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.233613861931695
          entropy_coeff: 0.009999999999999998
          kl: 0.012114555202581463
          policy_loss: 0.010555066499445173
          total_loss: 0.6503975960943434
          vf_explained_var: 0.9463294148445129
          vf_loss: 0.6521758251720005
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,219,13244.8,219000,9.4988,9.94,-7.29,26.51




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-23_23-38-34
  done: false
  episode_len_mean: 22.62
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.5184
  episode_reward_min: -7.979999999999939
  episodes_this_iter: 38
  episodes_total: 949
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.5197084923585256
          entropy_coeff: 0.009999999999999998
          kl: 0.011469699794970723
          policy_loss: -0.005524614618884193
          total_loss: 0.6488356414768431
          vf_explained_var: 0.5307630896568298
          vf_loss: 0.6695546466443274
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,220,14018.2,220000,9.5184,9.94,-7.98,22.62




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-23_23-48-53
  done: false
  episode_len_mean: 25.21
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.501700000000003
  episode_reward_min: -7.979999999999939
  episodes_this_iter: 30
  episodes_total: 979
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.2802790416611565
          entropy_coeff: 0.009999999999999998
          kl: 0.008146499333868679
          policy_loss: -0.05719970773077673
          total_loss: 0.312090934606062
          vf_explained_var: 0.6022082567214966
          vf_loss: 0.3820915169186062
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,221,14636.5,221000,9.5017,9.94,-7.98,25.21




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-24_00-12-08
  done: false
  episode_len_mean: 20.59
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.715700000000002
  episode_reward_min: 1.1000000000000691
  episodes_this_iter: 68
  episodes_total: 1047
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 0.5557803577846951
          entropy_coeff: 0.009999999999999998
          kl: 0.007399959243587754
          policy_loss: -0.03369577133821117
          total_loss: 0.0691984427264995
          vf_explained_var: 0.9899912476539612
          vf_loss: 0.10845028393798405
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 2220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,222,16032.3,222000,9.7157,9.94,1.1,20.59




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-24_00-25-48
  done: false
  episode_len_mean: 18.98
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.711500000000001
  episode_reward_min: -5.4799999999999125
  episodes_this_iter: 40
  episodes_total: 1087
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 1.1508510092894235
          entropy_coeff: 0.009999999999999998
          kl: 0.01367495933511166
          policy_loss: 0.0438562440375487
          total_loss: 0.3201247345242235
          vf_explained_var: 0.6826922297477722
          vf_loss: 0.28777379186617
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,223,16852.2,223000,9.7115,9.94,-5.48,18.98




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-24_00-41-17
  done: false
  episode_len_mean: 22.2
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.5012
  episode_reward_min: -9.799999999999953
  episodes_this_iter: 45
  episodes_total: 1132
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023463964462280265
          cur_lr: 5.000000000000001e-05
          entropy: 0.6656813492377599
          entropy_coeff: 0.009999999999999998
          kl: 0.004552802631267876
          policy_loss: -0.26878375858068465
          total_loss: -0.05413750807444254
          vf_explained_var: 0.9861242771148682
          vf_loss: 0.22130198892619873
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,224,17781.2,224000,9.5012,9.94,-9.8,22.2




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-24_00-57-54
  done: false
  episode_len_mean: 20.77
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.6247
  episode_reward_min: -9.799999999999953
  episodes_this_iter: 49
  episodes_total: 1181
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011731982231140132
          cur_lr: 5.000000000000001e-05
          entropy: 0.8183363603221046
          entropy_coeff: 0.009999999999999998
          kl: 0.0037391605009223067
          policy_loss: 0.038345351815223694
          total_loss: 0.2250489234096474
          vf_explained_var: 0.6929179430007935
          vf_loss: 0.19488649807042546
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,225,18777.5,225000,9.6247,9.94,-9.8,20.77




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-24_01-18-21
  done: false
  episode_len_mean: 17.75
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.833
  episode_reward_min: 4.850000000000065
  episodes_this_iter: 60
  episodes_total: 1241
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.865991115570066e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.663498310579194
          entropy_coeff: 0.009999999999999998
          kl: 0.004869333510762017
          policy_loss: -0.1452683062189155
          total_loss: 0.0823093579047256
          vf_explained_var: 0.9850636720657349
          vf_loss: 0.23421236558092964
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,226,20005.2,226000,9.833,9.94,4.85,17.75




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-24_01-27-16
  done: false
  episode_len_mean: 24.93
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.681600000000001
  episode_reward_min: -0.149999999999969
  episodes_this_iter: 27
  episodes_total: 1268
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.932995557785033e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2977517472373115
          entropy_coeff: 0.009999999999999998
          kl: 0.00845802754853179
          policy_loss: 0.08649353647811545
          total_loss: 0.41246618326339457
          vf_explained_var: 0.26440396904945374
          vf_loss: 0.33894991882973247
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 22700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,227,20539.9,227000,9.6816,9.94,-0.15,24.93




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-24_01-45-11
  done: false
  episode_len_mean: 24.57
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.6751
  episode_reward_min: -0.149999999999969
  episodes_this_iter: 53
  episodes_total: 1321
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.932995557785033e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7366250177224477
          entropy_coeff: 0.009999999999999998
          kl: 0.005789436100610492
          policy_loss: 0.1260036653942532
          total_loss: 0.26401676221026316
          vf_explained_var: 0.9364825487136841
          vf_loss: 0.14537917855713103
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,228,21614.8,228000,9.6751,9.94,-0.15,24.57




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-24_02-09-57
  done: false
  episode_len_mean: 13.54
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.9049
  episode_reward_min: 8.880000000000003
  episodes_this_iter: 73
  episodes_total: 1394
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.932995557785033e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.47089790503184
          entropy_coeff: 0.009999999999999998
          kl: 0.00901407848947253
          policy_loss: 0.026760050861371888
          total_loss: 0.08769444218940205
          vf_explained_var: 0.9941310286521912
          vf_loss: 0.06564310849126842
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,229,23100.4,229000,9.9049,9.94,8.88,13.54




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-24_02-35-24
  done: false
  episode_len_mean: 13.39
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.916099999999998
  episode_reward_min: 9.840000000000002
  episodes_this_iter: 75
  episodes_total: 1469
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.932995557785033e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.40498721831374696
          entropy_coeff: 0.009999999999999998
          kl: 0.009170128674534341
          policy_loss: -0.023302103868789142
          total_loss: 0.08554777020795477
          vf_explained_var: 0.9896326065063477
          vf_loss: 0.11289948082218568
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,230,24627.3,230000,9.9161,9.94,9.84,13.39




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-24_03-00-56
  done: false
  episode_len_mean: 13.16
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.9186
  episode_reward_min: 9.830000000000002
  episodes_this_iter: 76
  episodes_total: 1545
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.932995557785033e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3459425542089674
          entropy_coeff: 0.009999999999999998
          kl: 0.0037183995466616537
          policy_loss: -0.03269985591371854
          total_loss: -0.019662895198497506
          vf_explained_var: 0.9984441995620728
          vf_loss: 0.016496279204471245
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,231,26159.9,231000,9.9186,9.94,9.83,13.16




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-24_03-26-46
  done: false
  episode_len_mean: 13.06
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.909500000000001
  episode_reward_min: 8.820000000000004
  episodes_this_iter: 77
  episodes_total: 1622
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4664977788925165e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3305304318666458
          entropy_coeff: 0.009999999999999998
          kl: 0.006821419198687482
          policy_loss: 0.03406525839947992
          total_loss: 0.059712142611129414
          vf_explained_var: 0.9974610805511475
          vf_loss: 0.028952090297308233
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,232,27709.2,232000,9.9095,9.94,8.82,13.06




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-24_03-53-11
  done: false
  episode_len_mean: 12.83
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.921899999999999
  episode_reward_min: 9.88
  episodes_this_iter: 78
  episodes_total: 1700
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4664977788925165e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3415527754359775
          entropy_coeff: 0.009999999999999998
          kl: 0.001380184612124966
          policy_loss: -0.014308996881461805
          total_loss: 0.07000597163827883
          vf_explained_var: 0.9917638301849365
          vf_loss: 0.08773047753816678
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,233,29294.9,233000,9.9219,9.94,9.88,12.83




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-24_04-19-20
  done: false
  episode_len_mean: 13.08
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.909500000000001
  episode_reward_min: 8.850000000000001
  episodes_this_iter: 77
  episodes_total: 1777
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.332488894462583e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3431713306241565
          entropy_coeff: 0.009999999999999998
          kl: 0.00396151318383648
          policy_loss: -0.009833649680432346
          total_loss: 0.07594802129185863
          vf_explained_var: 0.9923407435417175
          vf_loss: 0.08921335662404696
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 23400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,234,30863.3,234000,9.9095,9.94,8.85,13.08




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-24_04-45-41
  done: false
  episode_len_mean: 13.01
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.91
  episode_reward_min: 8.850000000000001
  episodes_this_iter: 78
  episodes_total: 1855
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6662444472312914e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.31232206688986885
          entropy_coeff: 0.009999999999999998
          kl: 0.009122718113462729
          policy_loss: -0.13734322438637417
          total_loss: -0.13409643570582072
          vf_explained_var: 0.9993990659713745
          vf_loss: 0.006369977661718925
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,235,32444.6,235000,9.91,9.94,8.85,13.01




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-24_05-11-08
  done: false
  episode_len_mean: 13.38
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.856799999999998
  episode_reward_min: 7.910000000000001
  episodes_this_iter: 75
  episodes_total: 1930
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6662444472312914e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.2889241811301973
          entropy_coeff: 0.009999999999999998
          kl: 0.0038120317020504915
          policy_loss: 0.04745578741033872
          total_loss: 0.12193427032066716
          vf_explained_var: 0.9932722449302673
          vf_loss: 0.07736771301262908
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 2360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,236,33970.9,236000,9.8568,9.94,7.91,13.38




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-24_05-22-54
  done: false
  episode_len_mean: 12.92
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.8814
  episode_reward_min: 7.910000000000001
  episodes_this_iter: 34
  episodes_total: 1964
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8331222236156457e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.32384351028336417
          entropy_coeff: 0.009999999999999998
          kl: 0.016790976428133614
          policy_loss: -0.2042993684609731
          total_loss: -0.06500066758857834
          vf_explained_var: 0.8122290968894958
          vf_loss: 0.1425371047626767
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,237,34676.9,237000,9.8814,9.94,7.91,12.92




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-24_05-41-05
  done: false
  episode_len_mean: 21.64
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.8139
  episode_reward_min: -0.8899999999998761
  episodes_this_iter: 54
  episodes_total: 2018
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8331222236156457e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.38682499279578525
          entropy_coeff: 0.009999999999999998
          kl: 0.035703702594227456
          policy_loss: 0.14559035615788565
          total_loss: 0.20002881122959984
          vf_explained_var: 0.6456944942474365
          vf_loss: 0.05830664682305521
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,238,35768.6,238000,9.8139,9.94,-0.89,21.64




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-24_05-59-13
  done: false
  episode_len_mean: 12.89
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.9211
  episode_reward_min: 9.800000000000004
  episodes_this_iter: 53
  episodes_total: 2071
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3849554643034935
          entropy_coeff: 0.009999999999999998
          kl: 0.008085812231639226
          policy_loss: -0.2638840701844957
          total_loss: -0.2175902574426598
          vf_explained_var: 0.9961357712745667
          vf_loss: 0.05014335031931599
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,239,36856.5,239000,9.9211,9.94,9.8,12.89




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-24_06-09-45
  done: false
  episode_len_mean: 22.04
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.760200000000001
  episode_reward_min: -0.6499999999999252
  episodes_this_iter: 31
  episodes_total: 2102
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9892290532588959
          entropy_coeff: 0.009999999999999998
          kl: 0.02493309127863144
          policy_loss: 0.1490683061381181
          total_loss: 0.41658329806394045
          vf_explained_var: 0.9398901462554932
          vf_loss: 0.2774072091612551
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,240,37488.7,240000,9.7602,9.94,-0.65,22.04




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-24_06-33-58
  done: false
  episode_len_mean: 15.99
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.8705
  episode_reward_min: 4.340000000000077
  episodes_this_iter: 71
  episodes_total: 2173
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.124525003135205e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.165877293712563
          entropy_coeff: 0.009999999999999998
          kl: 0.0014062915058711785
          policy_loss: -0.2633011824554867
          total_loss: -0.2536825991339154
          vf_explained_var: 0.998932421207428
          vf_loss: 0.011277352728777461
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,241,38940.8,241000,9.8705,9.94,4.34,15.99




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-24_06-58-37
  done: false
  episode_len_mean: 14.87
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.8124
  episode_reward_min: 0.7400000000000462
  episodes_this_iter: 72
  episodes_total: 2245
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0622625015676027e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.19991424481074016
          entropy_coeff: 0.009999999999999998
          kl: 0.006299904571207864
          policy_loss: 0.06429068769017855
          total_loss: 0.27508265111181474
          vf_explained_var: 0.9853004217147827
          vf_loss: 0.2127910885752903
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,242,40420,242000,9.8124,9.94,0.74,14.87




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-24_07-26-09
  done: false
  episode_len_mean: 12.38
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.9163
  episode_reward_min: 8.900000000000002
  episodes_this_iter: 81
  episodes_total: 2326
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0622625015676027e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.16564417249626584
          entropy_coeff: 0.009999999999999998
          kl: 0.0017003969687835684
          policy_loss: -0.004265369764632649
          total_loss: 0.02809107028361824
          vf_explained_var: 0.996944010257721
          vf_loss: 0.034012882018254864
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,243,42072.7,243000,9.9163,9.94,8.9,12.38




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-24_07-44-22
  done: false
  episode_len_mean: 15.81
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.832200000000002
  episode_reward_min: 0.43000000000005323
  episodes_this_iter: 53
  episodes_total: 2379
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0311312507838013e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3514874868922763
          entropy_coeff: 0.009999999999999998
          kl: 0.005962281604859814
          policy_loss: -0.13915971351994408
          total_loss: -0.008072002728780111
          vf_explained_var: 0.9892051815986633
          vf_loss: 0.13460258150266277
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,244,43165.5,244000,9.8322,9.94,0.43,15.81




Result for PPO_my_env_aa44d_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-24_07-59-30
  done: false
  episode_len_mean: 15.86
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 9.821800000000001
  episode_reward_min: 0.43000000000005323
  episodes_this_iter: 43
  episodes_total: 2422
  experiment_id: d089d148212147fca860827e989d529c
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0311312507838013e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4883825722667906
          entropy_coeff: 0.009999999999999998
          kl: 0.010021544252595464
          policy_loss: -0.3225179077850448
          total_loss: -0.15349496122863557
          vf_explained_var: 0.9670743942260742
          vf_loss: 0.17390676839277147
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_aa44d_00000,RUNNING,192.168.3.5:343315,245,44072.8,245000,9.8218,9.94,0.43,15.86


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

KeyboardInterrupt: 