In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C3', 'C17', 'C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [7]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C3, C17, C32) pretrained (AngelaCNN) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger],
        checkpoint_at_end=True)



Trial name,status,loc
PPO_my_env_f560f_00000,PENDING,


2021-10-26 21:09:08,341	INFO wandb.py:170 -- Already logged into W&B.
2021-10-26 21:09:08,426	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=206)[0m 2021-10-26 21:09:11,737	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=206)[0m 2021-10-26 21:09:11,737	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=206)[0m 2021-10-26 21:09:31,767	INFO trainable.py:109 -- Trainable.setup took 22.430 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-26_21-11-12
  done: false
  episode_len_mean: 410.5
  episode_media: {}
  episode_reward_max: -4.079999999999957
  episode_reward_mean: -4.104999999999957
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8817786587609184
          entropy_coeff: 0.009999999999999998
          kl: 0.006693930526574989
          policy_loss: -0.20149057110150656
          total_loss: -0.22687244100703133
          vf_explained_var: 0.011597350239753723
          vf_loss: 0.0020971302007738914
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,1,101.046,1000,-4.105,-4.08,-4.13,410.5


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-26_21-11-34
  done: false
  episode_len_mean: 409.5
  episode_media: {}
  episode_reward_max: -4.079999999999957
  episode_reward_mean: -7.7274999999999565
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8707015249464245
          entropy_coeff: 0.009999999999999998
          kl: 0.009151069443873519
          policy_loss: 0.03347935097085105
          total_loss: 0.2642831400036812
          vf_explained_var: 0.33199697732925415
          vf_loss: 0.2576805866219931
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,2,123.047,2000,-7.7275,-4.08,-14.56,409.5


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-26_21-12-01
  done: false
  episode_len_mean: 409.57142857142856
  episode_media: {}
  episode_reward_max: -4.019999999999959
  episode_reward_mean: -6.171428571428528
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8689043336444433
          entropy_coeff: 0.009999999999999998
          kl: 0.010045820838391043
          policy_loss: -0.018307800342639288
          total_loss: -0.03426090627908707
          vf_explained_var: 0.12687383592128754
          vf_loss: 0.010726768181969722
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,3,149.551,3000,-6.17143,-4.02,-14.56,409.571


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-26_21-12-29
  done: false
  episode_len_mean: 407.0
  episode_media: {}
  episode_reward_max: -4.009999999999959
  episode_reward_mean: -5.952222222222179
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8358463340335422
          entropy_coeff: 0.009999999999999998
          kl: 0.009348850852260782
          policy_loss: -0.043690372361905046
          total_loss: 0.2846730943562256
          vf_explained_var: 0.050481654703617096
          vf_loss: 0.3548521632121669
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,4,177.414,4000,-5.95222,-4.01,-14.56,407


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-26_21-12-52
  done: false
  episode_len_mean: 407.9166666666667
  episode_media: {}
  episode_reward_max: -3.9099999999999606
  episode_reward_mean: -5.490833333333291
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.843253983391656
          entropy_coeff: 0.009999999999999998
          kl: 0.007908187637346378
          policy_loss: 0.04381274075971709
          total_loss: 0.024656585769520865
          vf_explained_var: 0.1311345249414444
          vf_loss: 0.0076947473920881745
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,5,200.633,5000,-5.49083,-3.91,-14.56,407.917


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-26_21-13-12
  done: false
  episode_len_mean: 405.92857142857144
  episode_media: {}
  episode_reward_max: -3.9099999999999606
  episode_reward_mean: -5.2692857142856715
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8125938918855455
          entropy_coeff: 0.009999999999999998
          kl: 0.010553975512047604
          policy_loss: -0.07968706952200996
          total_loss: -0.09060552856988377
          vf_explained_var: 0.39417898654937744
          vf_loss: 0.015096682307517362
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,6,220.76,6000,-5.26929,-3.91,-14.56,405.929


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-26_21-13-37
  done: false
  episode_len_mean: 409.4117647058824
  episode_media: {}
  episode_reward_max: -3.9099999999999606
  episode_reward_mean: -5.090588235294075
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7999437544080945
          entropy_coeff: 0.009999999999999998
          kl: 0.009619589861244246
          policy_loss: 0.04422048297193315
          total_loss: 0.02630374473002222
          vf_explained_var: 0.528251588344574
          vf_loss: 0.008158782916143536
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,7,245.135,7000,-5.09059,-3.91,-14.56,409.412


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-26_21-14-00
  done: false
  episode_len_mean: 410.0
  episode_media: {}
  episode_reward_max: -3.9099999999999606
  episode_reward_mean: -4.991578947368378
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 19
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.808088692029317
          entropy_coeff: 0.009999999999999998
          kl: 0.010383728035554768
          policy_loss: 0.04414480510685179
          total_loss: 0.023917792903052435
          vf_explained_var: 0.5834179520606995
          vf_loss: 0.0057771309563476175
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,8,268.144,8000,-4.99158,-3.91,-14.56,410


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-26_21-14-25
  done: false
  episode_len_mean: 407.3636363636364
  episode_media: {}
  episode_reward_max: -3.789999999999963
  episode_reward_mean: -4.843636363636321
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 22
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7849916166729396
          entropy_coeff: 0.009999999999999998
          kl: 0.00995967397750062
          policy_loss: -0.01054296592871348
          total_loss: -0.02931419387459755
          vf_explained_var: 0.29957467317581177
          vf_loss: 0.007086752699170675
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,9,293.328,9000,-4.84364,-3.79,-14.56,407.364


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-26_21-14-46
  done: false
  episode_len_mean: 405.4166666666667
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.759999999999958
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 24
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7531207111146716
          entropy_coeff: 0.009999999999999998
          kl: 0.012296335182541421
          policy_loss: 0.03669446926150057
          total_loss: 0.01581298170818223
          vf_explained_var: 0.8574175238609314
          vf_loss: 0.004190452605123735
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,10,314.614,10000,-4.76,-3.74,-14.56,405.417


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-26_21-15-07
  done: false
  episode_len_mean: 405.037037037037
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.677777777777736
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 27
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.732754847738478
          entropy_coeff: 0.009999999999999998
          kl: 0.01045083620668485
          policy_loss: -0.08798544775280688
          total_loss: -0.10930989169412189
          vf_explained_var: 0.6850016713142395
          vf_loss: 0.003912937245331705
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,11,335.198,11000,-4.67778,-3.74,-14.56,405.037


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-26_21-15-28
  done: false
  episode_len_mean: 403.0689655172414
  episode_media: {}
  episode_reward_max: -3.6899999999999653
  episode_reward_mean: -4.6148275862068555
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 29
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6927959256702
          entropy_coeff: 0.009999999999999998
          kl: 0.01113482702274195
          policy_loss: -0.022642872027224963
          total_loss: -0.04421991888019774
          vf_explained_var: 0.8580195903778076
          vf_loss: 0.003123946528648958
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,12,356.893,12000,-4.61483,-3.69,-14.56,403.069




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-26_21-16-08
  done: false
  episode_len_mean: 400.53125
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.534687499999959
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 32
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6194247828589545
          entropy_coeff: 0.009999999999999998
          kl: 0.01353420948382252
          policy_loss: 0.05509546262522538
          total_loss: 0.03491605791366763
          vf_explained_var: 0.8602737188339233
          vf_loss: 0.0033080038582233504
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,13,396.692,13000,-4.53469,-3.18,-14.56,400.531


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-26_21-16-29
  done: false
  episode_len_mean: 400.0
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.498235294117606
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.607422473695543
          entropy_coeff: 0.009999999999999998
          kl: 0.010976618506545242
          policy_loss: -0.12882483957542312
          total_loss: -0.1504777034951581
          vf_explained_var: 0.9385809898376465
          vf_loss: 0.0022260394568244615
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,14,417.42,14000,-4.49824,-3.18,-14.56,400


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-26_21-16-49
  done: false
  episode_len_mean: 401.1621621621622
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.469459459459418
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 37
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6100649409823946
          entropy_coeff: 0.009999999999999998
          kl: 0.0071737179211148255
          policy_loss: 0.06902458874715699
          total_loss: 0.047182806871003576
          vf_explained_var: 0.92244553565979
          vf_loss: 0.002824123889634696
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,15,437.686,15000,-4.46946,-3.18,-14.56,401.162


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-26_21-17-09
  done: false
  episode_len_mean: 402.35897435897436
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.456410256410215
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.588376930024889
          entropy_coeff: 0.009999999999999998
          kl: 0.011768352402437178
          policy_loss: -0.004901071306731966
          total_loss: -0.02441776411400901
          vf_explained_var: 0.6484857201576233
          vf_loss: 0.004013406161943243
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,16,457.919,16000,-4.45641,-3.18,-14.56,402.359


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-26_21-17-30
  done: false
  episode_len_mean: 402.8809523809524
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.430714285714244
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5697849538591173
          entropy_coeff: 0.009999999999999998
          kl: 0.010722033289922944
          policy_loss: -0.022641359104050532
          total_loss: -0.04232432544231415
          vf_explained_var: 0.681905210018158
          vf_loss: 0.0038704751576814386
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,17,478.351,17000,-4.43071,-3.18,-14.56,402.881


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-26_21-17-51
  done: false
  episode_len_mean: 402.45454545454544
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.408181818181777
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 44
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5368358506096733
          entropy_coeff: 0.009999999999999998
          kl: 0.011425428353076623
          policy_loss: -0.01748035413523515
          total_loss: -0.036167510723074274
          vf_explained_var: 0.8530434370040894
          vf_loss: 0.004396113653719011
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,18,498.903,18000,-4.40818,-3.18,-14.56,402.455


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-26_21-18-13
  done: false
  episode_len_mean: 403.8936170212766
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.398085106382937
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 47
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5071538819207086
          entropy_coeff: 0.009999999999999998
          kl: 0.011984452638829666
          policy_loss: -0.08643786178694832
          total_loss: -0.10261799212959077
          vf_explained_var: 0.7404918074607849
          vf_loss: 0.006494516828873505
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,19,521.139,19000,-4.39809,-3.18,-14.56,403.894


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-26_21-18-35
  done: false
  episode_len_mean: 403.9795918367347
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.384285714285673
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 49
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5349192168977526
          entropy_coeff: 0.009999999999999998
          kl: 0.009693121081491856
          policy_loss: 0.1305569537811809
          total_loss: 0.112049091524548
          vf_explained_var: 0.7686625719070435
          vf_loss: 0.004902706458880048
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,20,543.056,20000,-4.38429,-3.18,-14.56,403.98


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-26_21-18-56
  done: false
  episode_len_mean: 403.88235294117646
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.369803921568585
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.455001409848531
          entropy_coeff: 0.009999999999999998
          kl: 0.010219689819946767
          policy_loss: -0.1382840986053149
          total_loss: -0.15338130046923956
          vf_explained_var: 0.7235661745071411
          vf_loss: 0.007408875611791801
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,21,564.549,21000,-4.3698,-3.18,-14.56,403.882


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-26_21-19-16
  done: false
  episode_len_mean: 404.75925925925924
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.360185185185143
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 54
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.507863407664829
          entropy_coeff: 0.009999999999999998
          kl: 0.010158073835006975
          policy_loss: 0.0876531040502919
          total_loss: 0.07150597034229172
          vf_explained_var: 0.7023659944534302
          vf_loss: 0.006899882913825826
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,22,584.021,22000,-4.36019,-3.18,-14.56,404.759


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-26_21-19-35
  done: false
  episode_len_mean: 405.2857142857143
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.354285714285672
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 56
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.490634650654263
          entropy_coeff: 0.009999999999999998
          kl: 0.011465433029377812
          policy_loss: -0.08033123678631253
          total_loss: -0.09488666405280431
          vf_explained_var: 0.4009546935558319
          vf_loss: 0.008057833380169339
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,23,603.201,23000,-4.35429,-3.18,-14.56,405.286


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-26_21-19-55
  done: false
  episode_len_mean: 405.47457627118644
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.340847457627077
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 59
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4814838409423827
          entropy_coeff: 0.009999999999999998
          kl: 0.011547361620291597
          policy_loss: 0.028688969877031116
          total_loss: 0.013346583147843679
          vf_explained_var: 0.6899649500846863
          vf_loss: 0.007162977817157904
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,24,623.443,24000,-4.34085,-3.18,-14.56,405.475




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-26_21-20-32
  done: false
  episode_len_mean: 405.0655737704918
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.327377049180286
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 61
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4540601518419054
          entropy_coeff: 0.009999999999999998
          kl: 0.010463422418651468
          policy_loss: -0.09640533692306942
          total_loss: -0.11134051101075279
          vf_explained_var: 0.7281672358512878
          vf_loss: 0.007512736780336126
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,25,660.22,25000,-4.32738,-3.18,-14.56,405.066


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-26_21-20-52
  done: false
  episode_len_mean: 405.234375
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.316093749999958
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 64
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.449362431632148
          entropy_coeff: 0.009999999999999998
          kl: 0.011162429000397604
          policy_loss: 0.046493505438168846
          total_loss: 0.03313438014851676
          vf_explained_var: 0.4939649701118469
          vf_loss: 0.008902009242835145
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,26,680.731,26000,-4.31609,-3.18,-14.56,405.234


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-26_21-21-11
  done: false
  episode_len_mean: 405.72727272727275
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.313030303030261
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4453113264507715
          entropy_coeff: 0.009999999999999998
          kl: 0.011278179984771855
          policy_loss: 0.0378076809975836
          total_loss: 0.02216170993116167
          vf_explained_var: 0.5464633703231812
          vf_loss: 0.00655150698504359
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,27,699.517,27000,-4.31303,-3.18,-14.56,405.727


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-26_21-21-28
  done: false
  episode_len_mean: 407.45588235294116
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.322794117647017
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 68
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4061367564731175
          entropy_coeff: 0.009999999999999998
          kl: 0.008857812622741731
          policy_loss: -0.044620553818013935
          total_loss: -0.057711984713872275
          vf_explained_var: 0.529389500617981
          vf_loss: 0.009198372737996074
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,28,716.516,28000,-4.32279,-3.18,-14.56,407.456


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-26_21-21-46
  done: false
  episode_len_mean: 408.95714285714286
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.330714285714243
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3726951175265842
          entropy_coeff: 0.009999999999999998
          kl: 0.012609927682484694
          policy_loss: -0.11540727615356446
          total_loss: -0.1269681258334054
          vf_explained_var: 0.4854773283004761
          vf_loss: 0.009644113252741388
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,29,733.904,29000,-4.33071,-3.18,-14.56,408.957


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-26_21-22-04
  done: false
  episode_len_mean: 410.1095890410959
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.332328767123244
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2964592271380955
          entropy_coeff: 0.009999999999999998
          kl: 0.010855079602665383
          policy_loss: 0.017105329202281104
          total_loss: 0.006037713587284088
          vf_explained_var: 0.3646973967552185
          vf_loss: 0.00972596295695338
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,30,752.49,30000,-4.33233,-3.18,-14.56,410.11


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-26_21-22-24
  done: false
  episode_len_mean: 411.29333333333335
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.3379999999999574
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3256317191653784
          entropy_coeff: 0.009999999999999998
          kl: 0.007388174027658619
          policy_loss: 0.12349114186233945
          total_loss: 0.10784915950563219
          vf_explained_var: 0.41333329677581787
          vf_loss: 0.006136698361014068
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,31,772.233,31000,-4.338,-3.18,-14.56,411.293


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-26_21-22-42
  done: false
  episode_len_mean: 413.0779220779221
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.349999999999956
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 77
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.261150670051575
          entropy_coeff: 0.009999999999999998
          kl: 0.01006776460103715
          policy_loss: 0.07695305777920616
          total_loss: 0.06271962457233005
          vf_explained_var: 0.06441392004489899
          vf_loss: 0.006364521906652954
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,32,789.853,32000,-4.35,-3.18,-14.56,413.078


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-26_21-23-00
  done: false
  episode_len_mean: 414.1392405063291
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.355063291139197
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 79
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.264736893441942
          entropy_coeff: 0.009999999999999998
          kl: 0.012135230853571294
          policy_loss: -0.06882172475258509
          total_loss: -0.07726484992437893
          vf_explained_var: 0.1795087456703186
          vf_loss: 0.01177719769378503
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,33,808.45,33000,-4.35506,-3.18,-14.56,414.139


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-26_21-23-17
  done: false
  episode_len_mean: 416.1604938271605
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.369999999999956
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 81
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.254399469163683
          entropy_coeff: 0.009999999999999998
          kl: 0.012372651527956751
          policy_loss: -0.06952888303332859
          total_loss: -0.07874153653780619
          vf_explained_var: 0.42594611644744873
          vf_loss: 0.010856807306926284
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,34,825.478,34000,-4.37,-3.18,-14.56,416.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-26_21-23-35
  done: false
  episode_len_mean: 417.03614457831327
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.373734939758991
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 83
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2174444437026977
          entropy_coeff: 0.009999999999999998
          kl: 0.009913606287042123
          policy_loss: -0.09640476240052118
          total_loss: -0.1046782288286421
          vf_explained_var: 0.2441050261259079
          vf_loss: 0.011918259938829579
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,35,842.873,35000,-4.37373,-3.18,-14.56,417.036


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-26_21-23-51
  done: false
  episode_len_mean: 418.8235294117647
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.3868235294117195
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 85
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2617368592156306
          entropy_coeff: 0.009999999999999998
          kl: 0.009648960027131186
          policy_loss: -0.1267077879773246
          total_loss: -0.13435414565934076
          vf_explained_var: 0.09904710948467255
          vf_loss: 0.013041222750002312
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,36,859.484,36000,-4.38682,-3.18,-14.56,418.824


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-26_21-24-08
  done: false
  episode_len_mean: 420.85057471264366
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.4025287356321385
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 87
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1656513690948485
          entropy_coeff: 0.009999999999999998
          kl: 0.013383234315499513
          policy_loss: -0.12386591484149297
          total_loss: -0.13113673908842935
          vf_explained_var: 0.08924365788698196
          vf_loss: 0.01170904511689312
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,37,876.473,37000,-4.40253,-3.18,-14.56,420.851


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-26_21-24-26
  done: false
  episode_len_mean: 422.5168539325843
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.414831460674112
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 89
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.135616827011108
          entropy_coeff: 0.009999999999999998
          kl: 0.00944943473695494
          policy_loss: -0.0952509940498405
          total_loss: -0.10649778693914413
          vf_explained_var: 0.3403535783290863
          vf_loss: 0.008219492066483427
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,38,893.545,38000,-4.41483,-3.18,-14.56,422.517




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-26_21-25-02
  done: false
  episode_len_mean: 423.67391304347825
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.420217391304303
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 3
  episodes_total: 92
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1636713266372682
          entropy_coeff: 0.009999999999999998
          kl: 0.009122606649651043
          policy_loss: 0.01673584704597791
          total_loss: 0.006040062093072467
          vf_explained_var: 0.45913106203079224
          vf_loss: 0.009116406776593066
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,39,929.527,39000,-4.42022,-3.18,-14.56,423.674


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-26_21-25-18
  done: false
  episode_len_mean: 424.97872340425533
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.429361702127614
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 94
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0966144296858045
          entropy_coeff: 0.009999999999999998
          kl: 0.007264819130787058
          policy_loss: 0.07487137168645859
          total_loss: 0.06162944883108139
          vf_explained_var: 0.6552992463111877
          vf_loss: 0.006271256596341522
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,40,946.43,40000,-4.42936,-3.18,-14.56,424.979


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-26_21-25-37
  done: false
  episode_len_mean: 425.5
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.430833333333287
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 96
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0332605242729187
          entropy_coeff: 0.009999999999999998
          kl: 0.010479525744100273
          policy_loss: 0.17163413957589202
          total_loss: 0.15615554911394913
          vf_explained_var: 0.6852244138717651
          vf_loss: 0.002758111325076445
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,41,964.575,41000,-4.43083,-3.18,-14.56,425.5


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-26_21-25-54
  done: false
  episode_len_mean: 426.7959183673469
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.440204081632606
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 98
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0302238676283095
          entropy_coeff: 0.009999999999999998
          kl: 0.0069497836353732785
          policy_loss: 0.08858601252237956
          total_loss: 0.07760318550798628
          vf_explained_var: -0.5178723931312561
          vf_loss: 0.007929457092864646
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,42,981.706,42000,-4.4402,-3.18,-14.56,426.796


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-26_21-26-10
  done: false
  episode_len_mean: 427.84
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.4471999999999525
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9759939604335361
          entropy_coeff: 0.009999999999999998
          kl: 0.010658796190410528
          policy_loss: 0.03126361560490396
          total_loss: 0.02174871787428856
          vf_explained_var: 0.20080798864364624
          vf_loss: 0.008113281553151965
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,43,998.365,43000,-4.4472,-3.18,-14.56,427.84


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-26_21-26-26
  done: false
  episode_len_mean: 429.63
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.4650999999999526
  episode_reward_min: -14.559999999999956
  episodes_this_iter: 2
  episodes_total: 102
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9524822592735291
          entropy_coeff: 0.009999999999999998
          kl: 0.0062282148513881416
          policy_loss: 0.046789585053920744
          total_loss: 0.037049928141964804
          vf_explained_var: 0.13637052476406097
          vf_loss: 0.008539520423316087
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,44,1014.28,44000,-4.4651,-3.18,-14.56,429.63


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-26_21-26-44
  done: false
  episode_len_mean: 430.95
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.332999999999952
  episode_reward_min: -6.359999999999964
  episodes_this_iter: 2
  episodes_total: 104
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0119881351788838
          entropy_coeff: 0.009999999999999998
          kl: 0.006873495412918097
          policy_loss: -0.07099093281560474
          total_loss: -0.07447099702225791
          vf_explained_var: 0.04591848701238632
          vf_loss: 0.015265121285887693
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,45,1031.86,45000,-4.333,-3.18,-6.36,430.95


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-26_21-27-01
  done: false
  episode_len_mean: 432.9
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.352499999999951
  episode_reward_min: -6.359999999999964
  episodes_this_iter: 2
  episodes_total: 106
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.012096893787384
          entropy_coeff: 0.009999999999999998
          kl: 0.008971980919088013
          policy_loss: -0.062303852786620456
          total_loss: -0.0662222935921616
          vf_explained_var: 0.15109394490718842
          vf_loss: 0.014408133588666614
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,46,1048.49,46000,-4.3525,-3.18,-6.36,432.9


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-26_21-27-18
  done: false
  episode_len_mean: 434.08
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.364299999999952
  episode_reward_min: -6.359999999999964
  episodes_this_iter: 2
  episodes_total: 108
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1348640627331203
          entropy_coeff: 0.009999999999999998
          kl: 0.007381682180717498
          policy_loss: -0.07624410208728578
          total_loss: -0.08530980240967538
          vf_explained_var: -0.00286556757055223
          vf_loss: 0.01080660254940287
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,47,1065.81,47000,-4.3643,-3.18,-6.36,434.08


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-26_21-27-36
  done: false
  episode_len_mean: 435.81
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.357499999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 110
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0568699015511407
          entropy_coeff: 0.009999999999999998
          kl: 0.008429519884158113
          policy_loss: -0.06871056308348973
          total_loss: -0.07452944550249312
          vf_explained_var: 0.21179024875164032
          vf_loss: 0.01306391263885113
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,48,1083.31,48000,-4.3575,-3.18,-5.41,435.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-26_21-27-54
  done: false
  episode_len_mean: 436.95
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.368899999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 112
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.20808531443278
          entropy_coeff: 0.009999999999999998
          kl: 0.008178822510010737
          policy_loss: -0.11479123598999447
          total_loss: -0.12503869326578246
          vf_explained_var: 0.5377601385116577
          vf_loss: 0.010197624437407488
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,49,1101.56,49000,-4.3689,-3.18,-5.41,436.95


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-26_21-28-12
  done: false
  episode_len_mean: 438.75
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.386899999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.20339615477456
          entropy_coeff: 0.009999999999999998
          kl: 0.013158183892924709
          policy_loss: -0.015446053279770746
          total_loss: -0.0273199243677987
          vf_explained_var: 0.7018502950668335
          vf_loss: 0.0075284533707114555
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,50,1119.69,50000,-4.3869,-3.18,-5.41,438.75


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-26_21-28-32
  done: false
  episode_len_mean: 438.53
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.38469999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.041082387500339
          entropy_coeff: 0.009999999999999998
          kl: 0.009450865188950056
          policy_loss: 0.0692576963454485
          total_loss: 0.05854002253876792
          vf_explained_var: 0.6582333445549011
          vf_loss: 0.00780297859520134
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,51,1139.59,51000,-4.3847,-3.18,-5.41,438.53


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-26_21-28-50
  done: false
  episode_len_mean: 438.99
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.389299999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 119
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9794647494951885
          entropy_coeff: 0.009999999999999998
          kl: 0.010482709002301025
          policy_loss: -0.09127463284466002
          total_loss: -0.09952306292123264
          vf_explained_var: 0.6104941368103027
          vf_loss: 0.009449675769752098
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,52,1158.2,52000,-4.3893,-3.18,-5.41,438.99




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-26_21-29-25
  done: false
  episode_len_mean: 440.16
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.400999999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 121
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9821309937371148
          entropy_coeff: 0.009999999999999998
          kl: 0.0066147436167107426
          policy_loss: -0.12968552907307943
          total_loss: -0.13759703172577753
          vf_explained_var: 0.6365483999252319
          vf_loss: 0.010586858942406251
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,53,1193.02,53000,-4.401,-3.18,-5.41,440.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-26_21-29-43
  done: false
  episode_len_mean: 442.15
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.42089999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 124
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9603640423880684
          entropy_coeff: 0.009999999999999998
          kl: 0.006852227737694645
          policy_loss: 0.021181318163871764
          total_loss: 0.013153865933418274
          vf_explained_var: 0.6307980418205261
          vf_loss: 0.010205738344747159
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,54,1210.84,54000,-4.4209,-3.18,-5.41,442.15


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-26_21-30-02
  done: false
  episode_len_mean: 442.81
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.42749999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 126
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7686497661802503
          entropy_coeff: 0.009999999999999998
          kl: 0.009833820077159711
          policy_loss: 0.07001753035518858
          total_loss: 0.06028615352180269
          vf_explained_var: 0.671104371547699
          vf_loss: 0.0059883524544097275
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,55,1229.33,55000,-4.4275,-3.18,-5.41,442.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-26_21-30-19
  done: false
  episode_len_mean: 444.63
  episode_media: {}
  episode_reward_max: -3.179999999999976
  episode_reward_mean: -4.445699999999949
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 128
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7658163415061103
          entropy_coeff: 0.009999999999999998
          kl: 0.01390005483931718
          policy_loss: -0.006005084349049462
          total_loss: -0.010460357864697773
          vf_explained_var: 0.3914368450641632
          vf_loss: 0.010422875977949136
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,56,1246.66,56000,-4.4457,-3.18,-5.41,444.63


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-26_21-30-37
  done: false
  episode_len_mean: 446.63
  episode_media: {}
  episode_reward_max: -3.6799999999999655
  episode_reward_mean: -4.465699999999949
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 130
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8062674972746107
          entropy_coeff: 0.009999999999999998
          kl: 0.010254856603281345
          policy_loss: -0.10252917947040664
          total_loss: -0.10879186491171519
          vf_explained_var: 0.6146659255027771
          vf_loss: 0.009749019357776787
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,57,1264.65,57000,-4.4657,-3.68,-5.41,446.63


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-26_21-30-56
  done: false
  episode_len_mean: 447.1
  episode_media: {}
  episode_reward_max: -3.6799999999999655
  episode_reward_mean: -4.470399999999949
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 133
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.843553822570377
          entropy_coeff: 0.009999999999999998
          kl: 0.007938309941479895
          policy_loss: 0.052460870312319864
          total_loss: 0.046676982939243314
          vf_explained_var: 0.4853614270687103
          vf_loss: 0.011063986170726518
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,58,1283.8,58000,-4.4704,-3.68,-5.41,447.1


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-26_21-31-15
  done: false
  episode_len_mean: 447.22
  episode_media: {}
  episode_reward_max: -3.6799999999999655
  episode_reward_mean: -4.471599999999949
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 135
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7300720559226141
          entropy_coeff: 0.009999999999999998
          kl: 0.009527904609263269
          policy_loss: -0.04597685717874103
          total_loss: -0.051005705694357556
          vf_explained_var: 0.1508496105670929
          vf_loss: 0.010366291606462456
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,59,1302.86,59000,-4.4716,-3.68,-5.41,447.22


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-26_21-31-35
  done: false
  episode_len_mean: 446.82
  episode_media: {}
  episode_reward_max: -3.6799999999999655
  episode_reward_mean: -4.46819999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 138
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8225156678093803
          entropy_coeff: 0.009999999999999998
          kl: 0.0122492504059147
          policy_loss: 0.01615407872531149
          total_loss: 0.010866000751654307
          vf_explained_var: 0.2725166380405426
          vf_loss: 0.010487227847463348
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,60,1322.66,60000,-4.4682,-3.68,-5.41,446.82


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-26_21-31-56
  done: false
  episode_len_mean: 445.88
  episode_media: {}
  episode_reward_max: -3.5899999999999674
  episode_reward_mean: -4.4587999999999495
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 140
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7802946408589682
          entropy_coeff: 0.009999999999999998
          kl: 0.009370971061002544
          policy_loss: -0.10710880971617169
          total_loss: -0.1126515453060468
          vf_explained_var: 0.31177884340286255
          vf_loss: 0.010386017905289514
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,61,1343.57,61000,-4.4588,-3.59,-5.41,445.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-26_21-32-18
  done: false
  episode_len_mean: 444.34
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.443399999999949
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 143
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.762476544910007
          entropy_coeff: 0.009999999999999998
          kl: 0.006648992690264638
          policy_loss: -0.07242587159077327
          total_loss: -0.07690944116976527
          vf_explained_var: 0.4290089011192322
          vf_loss: 0.011811398311207692
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,62,1365.54,62000,-4.4434,-3.35,-5.41,444.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-26_21-32-40
  done: false
  episode_len_mean: 442.16
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.42159999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 146
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8402175002627903
          entropy_coeff: 0.009999999999999998
          kl: 0.012371580915246403
          policy_loss: 0.03783352259132597
          total_loss: 0.031407994031906125
          vf_explained_var: 0.5410161018371582
          vf_loss: 0.009502326576815297
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,63,1387.46,63000,-4.4216,-3.35,-5.41,442.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-26_21-33-01
  done: false
  episode_len_mean: 441.01
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.41009999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 149
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8923699259757996
          entropy_coeff: 0.009999999999999998
          kl: 0.005534391766837743
          policy_loss: 0.036619051463074155
          total_loss: 0.028495388726393383
          vf_explained_var: 0.5749627947807312
          vf_loss: 0.009693158788528914
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,64,1408.45,64000,-4.4101,-3.35,-5.41,441.01




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-26_21-33-39
  done: false
  episode_len_mean: 439.61
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.396099999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 152
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8474612368477716
          entropy_coeff: 0.009999999999999998
          kl: 0.00710539944170032
          policy_loss: -0.00047149194611443413
          total_loss: -0.007925534082783594
          vf_explained_var: 0.5691792964935303
          vf_loss: 0.009599492772637556
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,65,1446.65,65000,-4.3961,-3.35,-5.41,439.61


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-26_21-34-01
  done: false
  episode_len_mean: 438.28
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.38279999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 154
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8507510979970296
          entropy_coeff: 0.009999999999999998
          kl: 0.008851822530548108
          policy_loss: -0.09902804444233576
          total_loss: -0.1070359233352873
          vf_explained_var: 0.714235246181488
          vf_loss: 0.008729269199021575
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,66,1468.72,66000,-4.3828,-3.35,-5.41,438.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-26_21-34-23
  done: false
  episode_len_mean: 436.3
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.362999999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 157
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8713811371061537
          entropy_coeff: 0.009999999999999998
          kl: 0.007097136614666017
          policy_loss: -0.09518406457371181
          total_loss: -0.0995167581571473
          vf_explained_var: 0.5549496412277222
          vf_loss: 0.012961685988638135
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,67,1490.25,67000,-4.363,-3.35,-5.41,436.3


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-26_21-34-45
  done: false
  episode_len_mean: 434.98
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.34979999999995
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 160
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8363312416606479
          entropy_coeff: 0.009999999999999998
          kl: 0.009237936790694552
          policy_loss: 0.06432487757669555
          total_loss: 0.05681094394789802
          vf_explained_var: 0.7156004905700684
          vf_loss: 0.009001788270608005
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,68,1511.91,68000,-4.3498,-3.35,-5.41,434.98


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-26_21-35-06
  done: false
  episode_len_mean: 433.21
  episode_media: {}
  episode_reward_max: -3.3499999999999726
  episode_reward_mean: -4.332099999999951
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 163
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7203373339441088
          entropy_coeff: 0.009999999999999998
          kl: 0.008189478509752313
          policy_loss: 0.0703718002471659
          total_loss: 0.0639035277068615
          vf_explained_var: 0.4515962302684784
          vf_loss: 0.009097204393603736
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,69,1533.8,69000,-4.3321,-3.35,-5.41,433.21


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-26_21-35-29
  done: false
  episode_len_mean: 431.02
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.310199999999952
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 166
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.661296718650394
          entropy_coeff: 0.009999999999999998
          kl: 0.007379668331528835
          policy_loss: 0.09374218020174238
          total_loss: 0.08677808824512694
          vf_explained_var: 0.6114711165428162
          vf_loss: 0.00817294276979131
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,70,1556.48,70000,-4.3102,-3.3,-5.41,431.02


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-26_21-35-53
  done: false
  episode_len_mean: 427.03
  episode_media: {}
  episode_reward_max: -3.2199999999999753
  episode_reward_mean: -4.270299999999953
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 169
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6293457905451456
          entropy_coeff: 0.009999999999999998
          kl: 0.004354721471694549
          policy_loss: 0.09049575626850129
          total_loss: 0.08432617973950174
          vf_explained_var: 0.5349217057228088
          vf_loss: 0.009252936612918145
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,71,1579.9,71000,-4.2703,-3.22,-5.41,427.03


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-26_21-36-15
  done: false
  episode_len_mean: 423.63
  episode_media: {}
  episode_reward_max: -3.2199999999999753
  episode_reward_mean: -4.236299999999954
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 172
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.671098158094618
          entropy_coeff: 0.009999999999999998
          kl: 0.007211106322282785
          policy_loss: 0.0702626496553421
          total_loss: 0.06482720954550637
          vf_explained_var: 0.48482686281204224
          vf_loss: 0.01055443132063374
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,72,1602.65,72000,-4.2363,-3.22,-5.41,423.63


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-26_21-36-37
  done: false
  episode_len_mean: 421.19
  episode_media: {}
  episode_reward_max: -3.2199999999999753
  episode_reward_mean: -4.211899999999954
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 175
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.691452243593004
          entropy_coeff: 0.009999999999999998
          kl: 0.008110434984915413
          policy_loss: 0.04038001596927643
          total_loss: 0.03286116868257523
          vf_explained_var: 0.4983292818069458
          vf_loss: 0.008584630594769906
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,73,1624.13,73000,-4.2119,-3.22,-5.41,421.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-26_21-36-59
  done: false
  episode_len_mean: 417.21
  episode_media: {}
  episode_reward_max: -3.1699999999999764
  episode_reward_mean: -4.172099999999955
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 178
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7065786507394578
          entropy_coeff: 0.009999999999999998
          kl: 0.006431352449793732
          policy_loss: 0.0033643109930886162
          total_loss: -0.0023439201215902966
          vf_explained_var: 0.46507126092910767
          vf_loss: 0.010714418510906399
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,74,1646.65,74000,-4.1721,-3.17,-5.41,417.21




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-26_21-37-41
  done: false
  episode_len_mean: 412.61
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.126099999999956
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6395066910319858
          entropy_coeff: 0.009999999999999998
          kl: 0.014735081032167823
          policy_loss: 0.005385351760519875
          total_loss: 0.0008498146302170224
          vf_explained_var: 0.5117055177688599
          vf_loss: 0.010386021898981804
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,75,1687.9,75000,-4.1261,-3.07,-5.41,412.61


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-26_21-38-04
  done: false
  episode_len_mean: 408.55
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.085499999999956
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.652253630426195
          entropy_coeff: 0.009999999999999998
          kl: 0.006133855740260764
          policy_loss: 0.007081737948788537
          total_loss: 0.0013834137055608962
          vf_explained_var: 0.5534420609474182
          vf_loss: 0.010210827644914388
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,76,1711.32,76000,-4.0855,-3.07,-5.41,408.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-26_21-38-27
  done: false
  episode_len_mean: 403.79
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.037899999999958
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 187
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.766915406121148
          entropy_coeff: 0.009999999999999998
          kl: 0.013797709221348795
          policy_loss: 0.0034705998169051277
          total_loss: -0.002381066315703922
          vf_explained_var: 0.48694202303886414
          vf_loss: 0.010437718560246544
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 7700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,77,1734.7,77000,-4.0379,-3.07,-5.41,403.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-26_21-38-52
  done: false
  episode_len_mean: 399.91
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.9990999999999586
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 190
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8193794237242804
          entropy_coeff: 0.009999999999999998
          kl: 0.017188576138266538
          policy_loss: 0.0020415061049991186
          total_loss: -0.004661820166640811
          vf_explained_var: 0.5623418688774109
          vf_loss: 0.009771610484717207
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,78,1758.77,78000,-3.9991,-3.07,-5.41,399.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-26_21-39-16
  done: false
  episode_len_mean: 395.33
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.9532999999999596
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 193
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8418777704238891
          entropy_coeff: 0.009999999999999998
          kl: 0.013883553774391672
          policy_loss: 0.023801643153031668
          total_loss: 0.015555709103743235
          vf_explained_var: 0.6619348526000977
          vf_loss: 0.008784491894766688
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,79,1782.82,79000,-3.9533,-3.07,-5.41,395.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-26_21-39-40
  done: false
  episode_len_mean: 391.34
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.9133999999999594
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 196
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0041314244270323
          entropy_coeff: 0.009999999999999998
          kl: 0.014398503332032912
          policy_loss: -0.01669880814022488
          total_loss: -0.02842439826991823
          vf_explained_var: 0.7394914627075195
          vf_loss: 0.006875870850894393
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,80,1806.85,80000,-3.9134,-3.05,-5.41,391.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-26_21-40-00
  done: false
  episode_len_mean: 389.3
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.8929999999999603
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 2
  episodes_total: 198
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2880812366803487
          entropy_coeff: 0.009999999999999998
          kl: 0.016335131629321837
          policy_loss: -0.06168805476691988
          total_loss: -0.07700790647003386
          vf_explained_var: 0.6165409088134766
          vf_loss: 0.005927448108074411
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,81,1827.58,81000,-3.893,-3.05,-5.41,389.3


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-26_21-40-23
  done: false
  episode_len_mean: 385.15
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.851499999999961
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 201
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9400030122862921
          entropy_coeff: 0.009999999999999998
          kl: 0.00988971363661943
          policy_loss: 0.02242550775408745
          total_loss: 0.009146460311280356
          vf_explained_var: 0.8797903060913086
          vf_loss: 0.0051320130419400005
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,82,1850.14,82000,-3.8515,-3.05,-5.41,385.15


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-26_21-40-48
  done: false
  episode_len_mean: 380.74
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.8073999999999626
  episode_reward_min: -5.409999999999929
  episodes_this_iter: 3
  episodes_total: 204
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7682000146971808
          entropy_coeff: 0.009999999999999998
          kl: 0.03167289332838325
          policy_loss: 0.04564874743421873
          total_loss: 0.03649738075004683
          vf_explained_var: 0.8937360048294067
          vf_loss: 0.005363342875433672
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,83,1874.87,83000,-3.8074,-3.05,-5.41,380.74


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-26_21-41-12
  done: false
  episode_len_mean: 375.77
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.757699999999964
  episode_reward_min: -5.209999999999933
  episodes_this_iter: 3
  episodes_total: 207
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.7788416531350877
          entropy_coeff: 0.009999999999999998
          kl: 0.008074912100920213
          policy_loss: 0.06775343120098114
          total_loss: 0.05634134262800217
          vf_explained_var: 0.8978763818740845
          vf_loss: 0.005165088304784149
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,84,1898.72,84000,-3.7577,-3.05,-5.21,375.77




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-26_21-41-54
  done: false
  episode_len_mean: 370.68
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -3.7067999999999652
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 210
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.7388608124521043
          entropy_coeff: 0.009999999999999998
          kl: 0.009180262836580634
          policy_loss: -0.10756052293711238
          total_loss: -0.11585457035236889
          vf_explained_var: 0.8316570520401001
          vf_loss: 0.00771752144727442
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,85,1940.93,85000,-3.7068,-3.05,-5.19,370.68


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-26_21-42-19
  done: false
  episode_len_mean: 364.39
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.643899999999966
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 4
  episodes_total: 214
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.626993915769789
          entropy_coeff: 0.009999999999999998
          kl: 0.008346419874965644
          policy_loss: -0.06275139815277524
          total_loss: -0.07085220714410147
          vf_explained_var: 0.8280659914016724
          vf_loss: 0.006917165178391669
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,86,1965.86,86000,-3.6439,-2.88,-5.19,364.39


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-26_21-42-43
  done: false
  episode_len_mean: 360.47
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.604699999999967
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 217
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.5788951873779298
          entropy_coeff: 0.009999999999999998
          kl: 0.018548554102873125
          policy_loss: 0.06868136417534616
          total_loss: 0.06159850913617346
          vf_explained_var: 0.8296195864677429
          vf_loss: 0.005923814710048545
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,87,1989.74,87000,-3.6047,-2.88,-5.19,360.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-26_21-43-05
  done: false
  episode_len_mean: 358.8
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.597599999999967
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 220
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.7634786155488755
          entropy_coeff: 0.009999999999999998
          kl: 0.018879931254739725
          policy_loss: -0.022022899902529185
          total_loss: -0.008015985207425223
          vf_explained_var: 0.6192309856414795
          vf_loss: 0.028809713204908702
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,88,2011.66,88000,-3.5976,-2.88,-5.19,358.8


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-26_21-43-28
  done: false
  episode_len_mean: 354.99
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5594999999999675
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 223
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.7440049118465848
          entropy_coeff: 0.009999999999999998
          kl: 0.010101527819164652
          policy_loss: -0.030095893310176
          total_loss: -0.03881876137521532
          vf_explained_var: 0.7290900349617004
          vf_loss: 0.007201952205246521
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,89,2034.73,89000,-3.5595,-2.88,-5.19,354.99


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-26_21-43-51
  done: false
  episode_len_mean: 351.67
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5262999999999693
  episode_reward_min: -5.189999999999934
  episodes_this_iter: 3
  episodes_total: 226
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.727266538143158
          entropy_coeff: 0.009999999999999998
          kl: 0.010966902334029546
          policy_loss: 0.004533941547075908
          total_loss: -0.004997111939721638
          vf_explained_var: 0.6062621474266052
          vf_loss: 0.006096576692976264
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,90,2058.28,90000,-3.5263,-2.88,-5.19,351.67


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-26_21-44-14
  done: false
  episode_len_mean: 347.74
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.48699999999997
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 229
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.8480321685473124
          entropy_coeff: 0.009999999999999998
          kl: 0.013535840348921389
          policy_loss: 0.0017371005482143826
          total_loss: -0.0028262033230728573
          vf_explained_var: 0.3054497539997101
          vf_loss: 0.011886638636416239
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,91,2081.33,91000,-3.487,-2.88,-5.1,347.74


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-26_21-44-38
  done: false
  episode_len_mean: 344.67
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.4562999999999704
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 232
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.6975093245506288
          entropy_coeff: 0.009999999999999998
          kl: 0.015695808084929572
          policy_loss: 0.015960905535353554
          total_loss: 0.011365241474575467
          vf_explained_var: 0.382171094417572
          vf_loss: 0.010025058528279058
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,92,2105.19,92000,-3.4563,-2.88,-5.1,344.67


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-26_21-45-02
  done: false
  episode_len_mean: 341.77
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.4272999999999705
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 235
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.6610255334112378
          entropy_coeff: 0.009999999999999998
          kl: 0.011126979825156012
          policy_loss: 0.022016379568311904
          total_loss: 0.017805620696809556
          vf_explained_var: 0.3916604816913605
          vf_loss: 0.010730446472169003
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,93,2128.99,93000,-3.4273,-2.88,-5.1,341.77


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-26_21-45-27
  done: false
  episode_len_mean: 338.97
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.3992999999999722
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 238
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.61178197728263
          entropy_coeff: 0.009999999999999998
          kl: 0.006354728071194415
          policy_loss: 0.057990608447127875
          total_loss: 0.052279917067951626
          vf_explained_var: 0.3685750365257263
          vf_loss: 0.009453917676324232
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,94,2154.23,94000,-3.3993,-2.88,-5.1,338.97




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-26_21-46-11
  done: false
  episode_len_mean: 336.53
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.374899999999972
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 241
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.5527109106381733
          entropy_coeff: 0.009999999999999998
          kl: 0.005367260616789295
          policy_loss: -0.0884154831369718
          total_loss: -0.09110803074306911
          vf_explained_var: 0.38739287853240967
          vf_loss: 0.01202947199344635
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,95,2198.03,95000,-3.3749,-2.55,-5.1,336.53


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-26_21-46-35
  done: false
  episode_len_mean: 335.07
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.360299999999973
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 245
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.616500907474094
          entropy_coeff: 0.009999999999999998
          kl: 0.007017140498810903
          policy_loss: -0.01198451088534461
          total_loss: -0.013249536355336508
          vf_explained_var: 0.3904201090335846
          vf_loss: 0.013847412810557418
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,96,2222.3,96000,-3.3603,-2.55,-5.1,335.07


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-26_21-47-00
  done: false
  episode_len_mean: 333.33
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.3428999999999722
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 248
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.581896612379286
          entropy_coeff: 0.009999999999999998
          kl: 0.00563919220510548
          policy_loss: 0.05140208937227726
          total_loss: 0.04621755186882284
          vf_explained_var: 0.47147148847579956
          vf_loss: 0.00978855558981498
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,97,2247.16,97000,-3.3429,-2.55,-5.1,333.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-26_21-47-26
  done: false
  episode_len_mean: 331.41
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.3236999999999726
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 251
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.4951997637748717
          entropy_coeff: 0.009999999999999998
          kl: 0.005034799584732013
          policy_loss: -0.08273711345261997
          total_loss: -0.08423720300197601
          vf_explained_var: 0.3778819739818573
          vf_loss: 0.012696687721957763
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,98,2273.07,98000,-3.3237,-2.55,-5.1,331.41


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-26_21-47-52
  done: false
  episode_len_mean: 329.4
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.303599999999973
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 255
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.631214393509759
          entropy_coeff: 0.009999999999999998
          kl: 0.007233439345741348
          policy_loss: -0.021321498519844478
          total_loss: -0.023052970899475944
          vf_explained_var: 0.3377542495727539
          vf_loss: 0.013495653681457043
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,99,2298.37,99000,-3.3036,-2.55,-5.1,329.4


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-26_21-48-16
  done: false
  episode_len_mean: 327.51
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.284699999999974
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 258
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.6583635846773783
          entropy_coeff: 0.009999999999999998
          kl: 0.033775696951842425
          policy_loss: 0.09576238161987728
          total_loss: 0.08782348740431997
          vf_explained_var: 0.8647862672805786
          vf_loss: 0.0035783873301827246
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,100,2322.46,100000,-3.2847,-2.55,-5.1,327.51


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-26_21-48-40
  done: false
  episode_len_mean: 327.06
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.2801999999999736
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 261
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7783105055491129
          entropy_coeff: 0.009999999999999998
          kl: 0.008209220919507803
          policy_loss: 0.09342933057083024
          total_loss: 0.08578120130631658
          vf_explained_var: 0.23975054919719696
          vf_loss: 0.008287903376751476
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,101,2346.28,101000,-3.2802,-2.55,-5.1,327.06


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-26_21-49-04
  done: false
  episode_len_mean: 325.79
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.267499999999975
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 264
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7222543703185187
          entropy_coeff: 0.009999999999999998
          kl: 0.0073548250486250724
          policy_loss: 0.007151043663422267
          total_loss: 0.0035448631478680504
          vf_explained_var: 0.2557804584503174
          vf_loss: 0.011961523799173947
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,102,2370.86,102000,-3.2675,-2.55,-5.1,325.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-26_21-49-30
  done: false
  episode_len_mean: 324.37
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.253299999999975
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 268
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6975873324606154
          entropy_coeff: 0.009999999999999998
          kl: 0.006554527311066494
          policy_loss: -0.010210533936818441
          total_loss: -0.01064164758556419
          vf_explained_var: 0.30479663610458374
          vf_loss: 0.01506998875281877
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,103,2396.88,103000,-3.2533,-2.55,-5.1,324.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-26_21-50-13
  done: false
  episode_len_mean: 322.83
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.2378999999999754
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 271
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6555253744125367
          entropy_coeff: 0.009999999999999998
          kl: 0.009602662969734076
          policy_loss: 0.0243962023821142
          total_loss: 0.019894849095079634
          vf_explained_var: 0.4246949553489685
          vf_loss: 0.009893300079016221
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 1040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,104,2439.76,104000,-3.2379,-2.55,-5.1,322.83


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-26_21-50-40
  done: false
  episode_len_mean: 320.21
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.2116999999999756
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 275
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.582314627700382
          entropy_coeff: 0.009999999999999998
          kl: 0.006149150564390974
          policy_loss: -0.00463731764919228
          total_loss: -0.007306656655338075
          vf_explained_var: 0.49279987812042236
          vf_loss: 0.011770248273387551
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,105,2466.24,105000,-3.2117,-2.55,-5.1,320.21


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-26_21-51-06
  done: false
  episode_len_mean: 318.28
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1923999999999757
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 278
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6211760838826497
          entropy_coeff: 0.009999999999999998
          kl: 0.005530203889691501
          policy_loss: 0.020400058726469675
          total_loss: 0.015561754504839579
          vf_explained_var: 0.48837241530418396
          vf_loss: 0.01012916285544634
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,106,2492.52,106000,-3.1924,-2.55,-5.1,318.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-26_21-51-32
  done: false
  episode_len_mean: 316.42
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.173799999999977
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 282
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5676064160135057
          entropy_coeff: 0.009999999999999998
          kl: 0.006246579472963829
          policy_loss: -0.013791896899541219
          total_loss: -0.014184821148713429
          vf_explained_var: 0.38940903544425964
          vf_loss: 0.013877660418964094
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,107,2519.07,107000,-3.1738,-2.55,-5.1,316.42


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-26_21-51-59
  done: false
  episode_len_mean: 314.87
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.158299999999977
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 285
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5533149070209926
          entropy_coeff: 0.009999999999999998
          kl: 0.006748259896695746
          policy_loss: 0.020197176519367428
          total_loss: 0.016826732125547198
          vf_explained_var: 0.43806254863739014
          vf_loss: 0.010644348171384384
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,108,2545.72,108000,-3.1583,-2.55,-5.1,314.87


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-26_21-52-26
  done: false
  episode_len_mean: 312.4
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.133599999999977
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 289
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6449105421702066
          entropy_coeff: 0.009999999999999998
          kl: 0.007634539353883775
          policy_loss: 0.02567159003681607
          total_loss: 0.02568986780113644
          vf_explained_var: 0.2999686896800995
          vf_loss: 0.014749611955549982
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,109,2572.42,109000,-3.1336,-2.55,-5.1,312.4


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-26_21-52-52
  done: false
  episode_len_mean: 311.07
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1202999999999776
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 3
  episodes_total: 292
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5374998013178507
          entropy_coeff: 0.009999999999999998
          kl: 0.008800734492061273
          policy_loss: -0.05069270018074248
          total_loss: -0.052461830857727265
          vf_explained_var: 0.42842987179756165
          vf_loss: 0.01162570402957499
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,110,2598.73,110000,-3.1203,-2.55,-5.1,311.07


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-26_21-53-18
  done: false
  episode_len_mean: 309.48
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1142999999999774
  episode_reward_min: -5.099999999999957
  episodes_this_iter: 4
  episodes_total: 296
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6666696151097615
          entropy_coeff: 0.009999999999999998
          kl: 0.004349723925397934
          policy_loss: -0.1889729464219676
          total_loss: -0.18266073366006216
          vf_explained_var: 0.42624837160110474
          vf_loss: 0.022000222084008984
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,111,2624.81,111000,-3.1143,-2.55,-5.1,309.48


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-26_21-53-42
  done: false
  episode_len_mean: 308.93
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.236599999999978
  episode_reward_min: -17.060000000000024
  episodes_this_iter: 2
  episodes_total: 298
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8528739425871108
          entropy_coeff: 0.009999999999999998
          kl: 0.026208268747704523
          policy_loss: -0.15642890754259295
          total_loss: 0.134833996825748
          vf_explained_var: 0.4211900532245636
          vf_loss: 0.30684321117069985
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 11200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,112,2648.11,112000,-3.2366,-2.55,-17.06,308.93




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-26_21-54-24
  done: false
  episode_len_mean: 307.27
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.4364999999999806
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 302
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8096281607945761
          entropy_coeff: 0.009999999999999998
          kl: 0.02161639593878218
          policy_loss: -0.09944464241464933
          total_loss: -0.0035984674054715367
          vf_explained_var: 0.278305321931839
          vf_loss: 0.11029469128180709
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,113,2690.71,113000,-3.4365,-2.41,-20.29,307.27


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-26_21-54-50
  done: false
  episode_len_mean: 306.91
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.51119999999998
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 305
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7342188212606642
          entropy_coeff: 0.009999999999999998
          kl: 0.013732054763886087
          policy_loss: -0.0043992525587479275
          total_loss: 0.037056337752276
          vf_explained_var: 0.44660699367523193
          vf_loss: 0.055321850596616665
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,114,2716.2,114000,-3.5112,-2.41,-20.29,306.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-26_21-55-16
  done: false
  episode_len_mean: 305.08
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.492899999999981
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 309
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6214400702052647
          entropy_coeff: 0.009999999999999998
          kl: 0.009527536628822691
          policy_loss: 0.03546161527434985
          total_loss: 0.032650427437490886
          vf_explained_var: 0.4756033718585968
          vf_loss: 0.010991556456105576
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,115,2742.68,115000,-3.4929,-2.41,-20.29,305.08


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-26_21-55-43
  done: false
  episode_len_mean: 304.44
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.486499999999981
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 312
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6366134537590875
          entropy_coeff: 0.009999999999999998
          kl: 0.006102626439539034
          policy_loss: -0.08681668407387204
          total_loss: -0.09153334457013342
          vf_explained_var: 0.36763089895248413
          vf_loss: 0.01010474396041698
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,116,2769.87,116000,-3.4865,-2.41,-20.29,304.44


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-26_21-56-10
  done: false
  episode_len_mean: 303.42
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.4762999999999806
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 316
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7713542858759561
          entropy_coeff: 0.009999999999999998
          kl: 0.008873285662890456
          policy_loss: 0.022573692517148122
          total_loss: 0.01670833287967576
          vf_explained_var: 0.47950461506843567
          vf_loss: 0.009602128996306823
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,117,2796.16,117000,-3.4763,-2.41,-20.29,303.42


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-26_21-56-37
  done: false
  episode_len_mean: 300.41
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.436599999999982
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 320
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.680215503109826
          entropy_coeff: 0.009999999999999998
          kl: 0.007444533376643846
          policy_loss: -0.011957900267508296
          total_loss: -0.01895484493838416
          vf_explained_var: 0.6198225021362305
          vf_loss: 0.00792081129944159
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,118,2823.39,118000,-3.4366,-2.41,-20.29,300.41


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-26_21-57-02
  done: false
  episode_len_mean: 298.86
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.4210999999999814
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 323
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.8554874857266743
          entropy_coeff: 0.009999999999999998
          kl: 0.010436467334686918
          policy_loss: -0.09372670981619093
          total_loss: -0.10629776178134812
          vf_explained_var: 0.8293814659118652
          vf_loss: 0.00334209315220101
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,119,2848.91,119000,-3.4211,-2.41,-20.29,298.86


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-26_21-57-29
  done: false
  episode_len_mean: 297.47
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.4071999999999822
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 326
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.741104335255093
          entropy_coeff: 0.009999999999999998
          kl: 0.005864329329521152
          policy_loss: -0.11152531521187889
          total_loss: -0.122040656208992
          vf_explained_var: 0.8574399352073669
          vf_loss: 0.005411293052343859
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,120,2875.48,120000,-3.4072,-2.41,-20.29,297.47




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-26_21-58-11
  done: false
  episode_len_mean: 295.38
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.386299999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 330
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7496587687068514
          entropy_coeff: 0.009999999999999998
          kl: 0.0049601954150266655
          policy_loss: 0.052814696398046285
          total_loss: 0.04417626650796996
          vf_explained_var: 0.8755493760108948
          vf_loss: 0.007602606108412147
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,121,2917.24,121000,-3.3863,-2.41,-20.29,295.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-26_21-58-38
  done: false
  episode_len_mean: 294.34
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.375899999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 333
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.63628830909729
          entropy_coeff: 0.009999999999999998
          kl: 0.00833749184627997
          policy_loss: -0.12838825715912713
          total_loss: -0.1367115347749657
          vf_explained_var: 0.7602048516273499
          vf_loss: 0.00698439315892756
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,122,2944.21,122000,-3.3759,-2.41,-20.29,294.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-26_21-59-04
  done: false
  episode_len_mean: 293.07
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.3631999999999835
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 337
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.636862822373708
          entropy_coeff: 0.009999999999999998
          kl: 0.015415753527477389
          policy_loss: 0.05441404071946939
          total_loss: 0.04337079752650526
          vf_explained_var: 0.917784571647644
          vf_loss: 0.00337432945250637
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,123,2970.03,123000,-3.3632,-2.41,-20.29,293.07


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-26_21-59-29
  done: false
  episode_len_mean: 293.29
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.3653999999999833
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 340
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1265625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4925052258703444
          entropy_coeff: 0.009999999999999998
          kl: 0.033675724076296654
          policy_loss: -0.10125295942028363
          total_loss: -0.10020979882942306
          vf_explained_var: 0.6790928840637207
          vf_loss: 0.011706130755030447
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,124,2995.49,124000,-3.3654,-2.41,-20.29,293.29


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-26_21-59-53
  done: false
  episode_len_mean: 293.11
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.363599999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 344
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6817943930625916
          entropy_coeff: 0.009999999999999998
          kl: 0.011752771282642686
          policy_loss: -0.05153635889291763
          total_loss: -0.05361464586522844
          vf_explained_var: 0.42639589309692383
          vf_loss: 0.012508466777702173
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,125,3019.67,125000,-3.3636,-2.41,-20.29,293.11


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-26_22-00-20
  done: false
  episode_len_mean: 291.9
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.3514999999999833
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 347
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3801237927542793
          entropy_coeff: 0.009999999999999998
          kl: 0.006995012194636748
          policy_loss: -0.028521980014112262
          total_loss: -0.03646469157603052
          vf_explained_var: 0.8210258483886719
          vf_loss: 0.004530567357627054
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,126,3045.93,126000,-3.3515,-2.41,-20.29,291.9


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-26_22-00-44
  done: false
  episode_len_mean: 292.01
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.352599999999984
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 350
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4917173345883687
          entropy_coeff: 0.009999999999999998
          kl: 0.00549509877564421
          policy_loss: -0.0692497486455573
          total_loss: -0.07648200069864591
          vf_explained_var: 0.7891548871994019
          vf_loss: 0.006641709165544145
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 12700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,127,3069.8,127000,-3.3526,-2.41,-20.29,292.01


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-26_22-01-09
  done: false
  episode_len_mean: 292.22
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.354499999999984
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 354
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4625033325619168
          entropy_coeff: 0.009999999999999998
          kl: 0.012809826387805145
          policy_loss: -0.09443538640108373
          total_loss: -0.0997295603983932
          vf_explained_var: 0.7844109535217285
          vf_loss: 0.006898991667872501
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 1280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,128,3095.36,128000,-3.3545,-2.41,-20.29,292.22


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-26_22-01-36
  done: false
  episode_len_mean: 291.2
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.3442999999999836
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 358
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1611292534404332
          entropy_coeff: 0.009999999999999998
          kl: 0.008637492703512824
          policy_loss: -0.03706767103738255
          total_loss: -0.042267886135313244
          vf_explained_var: 0.8796376585960388
          vf_loss: 0.004771301352108518
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,129,3122.18,129000,-3.3443,-2.41,-20.29,291.2




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-26_22-02-20
  done: false
  episode_len_mean: 289.32
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.3254999999999844
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 361
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.264870580037435
          entropy_coeff: 0.009999999999999998
          kl: 0.009020715044339194
          policy_loss: 0.005826147976848814
          total_loss: 0.0004135170744525062
          vf_explained_var: 0.8554562926292419
          vf_loss: 0.005523548237720712
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,130,3166.51,130000,-3.3255,-2.39,-20.29,289.32


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-26_22-02-47
  done: false
  episode_len_mean: 287.84
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.3106999999999847
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 365
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1588695049285889
          entropy_coeff: 0.009999999999999998
          kl: 0.0065588335543138344
          policy_loss: 0.06472476166155604
          total_loss: 0.060164642996258205
          vf_explained_var: 0.792820394039154
          vf_loss: 0.005783421566916837
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 1310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,131,3192.9,131000,-3.3107,-2.39,-20.29,287.84


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-26_22-03-14
  done: false
  episode_len_mean: 286.97
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.3019999999999845
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 369
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0983068227767945
          entropy_coeff: 0.009999999999999998
          kl: 0.0054657040370790985
          policy_loss: 0.020902348558108012
          total_loss: 0.01552495550778177
          vf_explained_var: 0.8592559099197388
          vf_loss: 0.004568043239932093
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,132,3219.96,132000,-3.302,-2.39,-20.29,286.97


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-26_22-03-40
  done: false
  episode_len_mean: 286.87
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.300999999999984
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 372
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1458942492802937
          entropy_coeff: 0.009999999999999998
          kl: 0.012472922561184024
          policy_loss: -0.01681630959113439
          total_loss: -0.022154872160818843
          vf_explained_var: 0.8573592901229858
          vf_loss: 0.0037524790360799268
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,133,3245.71,133000,-3.301,-2.39,-20.29,286.87


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-26_22-04-05
  done: false
  episode_len_mean: 287.16
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.3038999999999845
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 376
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1589768118328518
          entropy_coeff: 0.009999999999999998
          kl: 0.014492516422465289
          policy_loss: -0.059007013258006835
          total_loss: -0.06184819415211677
          vf_explained_var: 0.7900031208992004
          vf_loss: 0.005997276603658166
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,134,3270.68,134000,-3.3039,-2.39,-20.29,287.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-26_22-04-25
  done: false
  episode_len_mean: 289.81
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.3303999999999836
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 2
  episodes_total: 378
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5264506141344707
          entropy_coeff: 0.009999999999999998
          kl: 0.02596486716891248
          policy_loss: 0.10714198268122144
          total_loss: 0.10389681690269047
          vf_explained_var: 0.7836861610412598
          vf_loss: 0.0070900725640563505
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 13500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,135,3290.65,135000,-3.3304,-2.39,-20.29,289.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-26_22-04-48
  done: false
  episode_len_mean: 290.19
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.334199999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 381
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4008844918674892
          entropy_coeff: 0.009999999999999998
          kl: 0.008627027649374005
          policy_loss: 0.07326915773252646
          total_loss: 0.06981124766170979
          vf_explained_var: 0.6873680353164673
          vf_loss: 0.008094252614925305
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,136,3314.44,136000,-3.3342,-2.39,-20.29,290.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-26_22-05-14
  done: false
  episode_len_mean: 290.64
  episode_media: {}
  episode_reward_max: -1.3699999999999914
  episode_reward_mean: -3.3183999999999845
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 4
  episodes_total: 385
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2663840002483793
          entropy_coeff: 0.009999999999999998
          kl: 0.011740849843215094
          policy_loss: -0.009642419384585486
          total_loss: -0.013744571059942246
          vf_explained_var: 0.8216759562492371
          vf_loss: 0.0052183007795570625
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,137,3340.2,137000,-3.3184,-1.37,-20.29,290.64


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-26_22-05-36
  done: false
  episode_len_mean: 292.76
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.2991999999999835
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 388
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7075225684377882
          entropy_coeff: 0.009999999999999998
          kl: 0.02401730621124813
          policy_loss: -0.053478807873196074
          total_loss: 0.031384302924076714
          vf_explained_var: -0.02827630005776882
          vf_loss: 0.09509903024364677
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,138,3361.6,138000,-3.2992,0.47,-20.29,292.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-26_22-06-15
  done: false
  episode_len_mean: 294.51
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.2963999999999833
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 391
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6468330714437696
          entropy_coeff: 0.009999999999999998
          kl: 0.005460101568065549
          policy_loss: 0.07044509289165338
          total_loss: 0.06749338329666191
          vf_explained_var: 0.7420144081115723
          vf_loss: 0.011184346704329882
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,139,3401.07,139000,-3.2964,0.47,-20.29,294.51


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-26_22-06-35
  done: false
  episode_len_mean: 297.38
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.352699999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 2
  episodes_total: 393
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6961319247881572
          entropy_coeff: 0.009999999999999998
          kl: 0.015788029296400413
          policy_loss: -0.0029411191741625466
          total_loss: 0.1567590273088879
          vf_explained_var: 0.1589200794696808
          vf_loss: 0.169917633684559
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,140,3420.93,140000,-3.3527,0.47,-20.29,297.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-26_22-06-53
  done: false
  episode_len_mean: 300.16
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.370599999999983
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 2
  episodes_total: 395
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7719878686798944
          entropy_coeff: 0.009999999999999998
          kl: 0.007947280785936093
          policy_loss: -0.15494706109166145
          total_loss: -0.1443476660384072
          vf_explained_var: 0.21035054326057434
          vf_loss: 0.024924604625751575
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,141,3439,141000,-3.3706,0.47,-20.29,300.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-26_22-07-12
  done: false
  episode_len_mean: 301.37
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.264599999999982
  episode_reward_min: -20.29000000000017
  episodes_this_iter: 3
  episodes_total: 398
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8287329024738737
          entropy_coeff: 0.009999999999999998
          kl: 0.009301853591465913
          policy_loss: 0.003509977294339074
          total_loss: 0.011353949705759684
          vf_explained_var: 0.4773760139942169
          vf_loss: 0.02215802982553012
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,142,3458.39,142000,-3.2646,0.47,-20.29,301.37


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-26_22-07-31
  done: false
  episode_len_mean: 304.15
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.1449999999999796
  episode_reward_min: -11.549999999999937
  episodes_this_iter: 2
  episodes_total: 400
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8273127992947897
          entropy_coeff: 0.009999999999999998
          kl: 0.007110824947389361
          policy_loss: -0.14596704178386263
          total_loss: -0.14156670164730814
          vf_explained_var: 0.13330630958080292
          vf_loss: 0.019636089406493636
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,143,3477.35,143000,-3.145,0.47,-11.55,304.15


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-26_22-07-52
  done: false
  episode_len_mean: 305.18
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.04729999999998
  episode_reward_min: -6.609999999999933
  episodes_this_iter: 3
  episodes_total: 403
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8895086208979288
          entropy_coeff: 0.009999999999999998
          kl: 0.014240508838818715
          policy_loss: -0.12555986568331717
          total_loss: -0.12935439894596737
          vf_explained_var: 0.8563286066055298
          vf_loss: 0.009017739341490798
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,144,3498.28,144000,-3.0473,0.47,-6.61,305.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-26_22-08-12
  done: false
  episode_len_mean: 307.97
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.152699999999979
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 406
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.004519049326579
          entropy_coeff: 0.009999999999999998
          kl: 0.013435069909406986
          policy_loss: 0.0677464731865459
          total_loss: 0.18696191252933608
          vf_explained_var: 0.2009071409702301
          vf_loss: 0.13352186613612704
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,145,3518.24,145000,-3.1527,0.47,-11.81,307.97


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-26_22-08-32
  done: false
  episode_len_mean: 310.47
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.1774999999999785
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 408
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9313336703512403
          entropy_coeff: 0.009999999999999998
          kl: 0.010515472270533266
          policy_loss: -0.13864523205492232
          total_loss: -0.13116411401165856
          vf_explained_var: 0.7621058821678162
          vf_loss: 0.02230279156162093
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,146,3538.28,146000,-3.1775,0.47,-11.81,310.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-26_22-08-54
  done: false
  episode_len_mean: 312.23
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.195099999999978
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 411
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9577445891168384
          entropy_coeff: 0.009999999999999998
          kl: 0.007491656985137377
          policy_loss: -0.07880350210600429
          total_loss: -0.07033848547273212
          vf_explained_var: 0.8238376975059509
          vf_loss: 0.024842411362462572
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,147,3559.69,147000,-3.1951,0.47,-11.81,312.23


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-26_22-09-15
  done: false
  episode_len_mean: 314.67
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.207699999999978
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 414
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9499300109015572
          entropy_coeff: 0.009999999999999998
          kl: 0.01307893923618898
          policy_loss: -0.02145984884765413
          total_loss: 0.11703795327080621
          vf_explained_var: 0.2886740267276764
          vf_loss: 0.15241045447376866
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,148,3580.92,148000,-3.2077,0.47,-11.81,314.67


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-26_22-09-37
  done: false
  episode_len_mean: 316.89
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.229499999999977
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 417
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.992124123043484
          entropy_coeff: 0.009999999999999998
          kl: 0.011304258560680106
          policy_loss: -0.051667586093147595
          total_loss: -0.020162198754648367
          vf_explained_var: 0.6225396394729614
          vf_loss: 0.0465980330740826
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,149,3602.38,149000,-3.2295,0.47,-11.81,316.89




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-26_22-10-14
  done: false
  episode_len_mean: 319.71
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.2574999999999767
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 420
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.105990489323934
          entropy_coeff: 0.009999999999999998
          kl: 0.011965464386092901
          policy_loss: -0.009267827620108922
          total_loss: -0.009357398996750513
          vf_explained_var: 0.7719511389732361
          vf_loss: 0.01585930545762595
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,150,3640.03,150000,-3.2575,0.47,-11.81,319.71


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-26_22-10-36
  done: false
  episode_len_mean: 320.92
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.269599999999976
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 422
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.1331271118587916
          entropy_coeff: 0.009999999999999998
          kl: 0.005507875096301085
          policy_loss: 0.058241678391479784
          total_loss: 0.046624735059837503
          vf_explained_var: 0.8944633603096008
          vf_loss: 0.0073616467700857256
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,151,3661.71,151000,-3.2696,0.47,-11.81,320.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-26_22-10-58
  done: false
  episode_len_mean: 322.87
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.268699999999976
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 425
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0805513064066568
          entropy_coeff: 0.009999999999999998
          kl: 0.00887743822252247
          policy_loss: 0.027410916942689153
          total_loss: 0.017052886759241423
          vf_explained_var: 0.8392413854598999
          vf_loss: 0.006655497409196363
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,152,3683.56,152000,-3.2687,0.47,-11.81,322.87


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-26_22-11-19
  done: false
  episode_len_mean: 324.39
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.2833999999999754
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 428
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.122558131482866
          entropy_coeff: 0.009999999999999998
          kl: 0.01179428720229357
          policy_loss: -0.011346858574284448
          total_loss: 0.018919939837521976
          vf_explained_var: 0.47791579365730286
          vf_loss: 0.04645447197318491
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,153,3704.9,153000,-3.2834,0.47,-11.81,324.39


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-26_22-11-39
  done: false
  episode_len_mean: 327.37
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.3019999999999747
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 431
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.2450922436184353
          entropy_coeff: 0.009999999999999998
          kl: 0.024244094058563922
          policy_loss: -0.0067287792762120565
          total_loss: 0.03611660272710853
          vf_explained_var: 0.5561051964759827
          vf_loss: 0.05494047702652299
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,154,3724.81,154000,-3.302,0.47,-11.81,327.37


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-26_22-12-01
  done: false
  episode_len_mean: 329.3
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.321299999999975
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 434
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2172328816519844
          entropy_coeff: 0.009999999999999998
          kl: 0.008646088709338586
          policy_loss: -0.03595864673455556
          total_loss: -0.04804618838760588
          vf_explained_var: 0.7718023657798767
          vf_loss: 0.004545040466150062
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,155,3746.35,155000,-3.3213,0.47,-11.81,329.3


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-26_22-12-21
  done: false
  episode_len_mean: 331.18
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.3195999999999746
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 436
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.224218805631002
          entropy_coeff: 0.009999999999999998
          kl: 0.009127578959070308
          policy_loss: -0.13354377816948626
          total_loss: -0.10740843245552646
          vf_explained_var: 0.5513020753860474
          vf_loss: 0.04252928693571852
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,156,3766.93,156000,-3.3196,0.47,-11.81,331.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-26_22-12-41
  done: false
  episode_len_mean: 334.14
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.387499999999974
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 439
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.250421979692247
          entropy_coeff: 0.009999999999999998
          kl: 0.008698979320487549
          policy_loss: 0.09661971082290013
          total_loss: 0.16042745278941262
          vf_explained_var: 0.32823580503463745
          vf_loss: 0.08073832900780771
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 15700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,157,3786.68,157000,-3.3875,0.47,-11.81,334.14


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-26_22-13-01
  done: false
  episode_len_mean: 336.28
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.3984999999999737
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 442
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.300723444090949
          entropy_coeff: 0.009999999999999998
          kl: 0.008363813882318025
          policy_loss: -0.047031883978181414
          total_loss: 0.02425921360651652
          vf_explained_var: 0.34326526522636414
          vf_loss: 0.08893944753540886
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,158,3807.06,158000,-3.3985,0.47,-11.81,336.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-26_22-13-21
  done: false
  episode_len_mean: 337.87
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.4141999999999735
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 444
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.260968404346042
          entropy_coeff: 0.009999999999999998
          kl: 0.012382988359573252
          policy_loss: -0.09254080951213836
          total_loss: -0.08948864829209116
          vf_explained_var: 0.5106104612350464
          vf_loss: 0.017727783932867978
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,159,3826.45,159000,-3.4142,0.47,-11.81,337.87


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-26_22-13-41
  done: false
  episode_len_mean: 340.98
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.4450999999999734
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 447
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.31755743821462
          entropy_coeff: 0.009999999999999998
          kl: 0.007063835107683462
          policy_loss: -0.020313807825247446
          total_loss: -0.008380269590351317
          vf_explained_var: 0.4026198387145996
          vf_loss: 0.030583150257977348
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,160,3846.35,160000,-3.4451,0.47,-11.81,340.98




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-26_22-14-18
  done: false
  episode_len_mean: 343.38
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.468899999999972
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 450
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 2.2436168511708576
          entropy_coeff: 0.009999999999999998
          kl: 0.004530881777275833
          policy_loss: 0.005971306727992164
          total_loss: -0.005488702240917418
          vf_explained_var: 0.5266739726066589
          vf_loss: 0.008073120437458985
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,161,3883.28,161000,-3.4689,0.47,-11.81,343.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-26_22-14-37
  done: false
  episode_len_mean: 344.56
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.480499999999972
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 452
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2443522029452856
          entropy_coeff: 0.009999999999999998
          kl: 0.009288977819974672
          policy_loss: -0.017290725600388315
          total_loss: -0.0031381814637117915
          vf_explained_var: 0.4549788236618042
          vf_loss: 0.03362023709859285
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,162,3902.73,162000,-3.4805,0.47,-11.81,344.56


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-26_22-14-57
  done: false
  episode_len_mean: 348.21
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.516799999999971
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 455
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1863873375786675
          entropy_coeff: 0.009999999999999998
          kl: 0.009686057964904727
          policy_loss: 0.015573078269759814
          total_loss: 0.006245118172632323
          vf_explained_var: 0.3979330360889435
          vf_loss: 0.009432873031538393
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,163,3922.24,163000,-3.5168,0.47,-11.81,348.21


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-26_22-15-16
  done: false
  episode_len_mean: 350.45
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.548899999999971
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 457
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.188035672240787
          entropy_coeff: 0.009999999999999998
          kl: 0.012778331139882207
          policy_loss: -0.0019674893882539535
          total_loss: 0.0334624310127563
          vf_explained_var: 0.22866210341453552
          vf_loss: 0.05321659750512077
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,164,3941.73,164000,-3.5489,0.47,-11.81,350.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-26_22-15-36
  done: false
  episode_len_mean: 353.8
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.58219999999997
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 460
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2420010487238566
          entropy_coeff: 0.009999999999999998
          kl: 0.010919387188226597
          policy_loss: 0.011274766466683812
          total_loss: 0.008052314445376397
          vf_explained_var: 0.2987886965274811
          vf_loss: 0.01569940633037024
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 1650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,165,3961.41,165000,-3.5822,0.47,-11.81,353.8


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-26_22-15-56
  done: false
  episode_len_mean: 356.3
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.626499999999969
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 462
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2638236840565997
          entropy_coeff: 0.009999999999999998
          kl: 0.010336334454159744
          policy_loss: -0.04158201528092225
          total_loss: 0.001509910925394959
          vf_explained_var: -0.008582349866628647
          vf_loss: 0.06241880040615797
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,166,3981.56,166000,-3.6265,0.47,-11.81,356.3


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-26_22-16-15
  done: false
  episode_len_mean: 360.11
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.6835999999999682
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 465
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2301726659138996
          entropy_coeff: 0.009999999999999998
          kl: 0.00963376037606948
          policy_loss: -0.0779984021352397
          total_loss: -0.06897061864535013
          vf_explained_var: 0.25904226303100586
          vf_loss: 0.02824322597589344
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,167,4000.61,167000,-3.6836,0.47,-11.81,360.11


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-26_22-16-35
  done: false
  episode_len_mean: 362.58
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.677799999999968
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 467
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1989227506849502
          entropy_coeff: 0.009999999999999998
          kl: 0.014607125438644298
          policy_loss: -0.08854588052878777
          total_loss: -0.0108669132201208
          vf_explained_var: 0.048544999212026596
          vf_loss: 0.09498863479950362
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,168,4020.36,168000,-3.6778,0.47,-11.81,362.58


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-26_22-16-55
  done: false
  episode_len_mean: 365.88
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.7102999999999673
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 470
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2544280025694103
          entropy_coeff: 0.009999999999999998
          kl: 0.01524596727456622
          policy_loss: -0.002698773228459888
          total_loss: -0.0074460671179824404
          vf_explained_var: 0.18451470136642456
          vf_loss: 0.012912764167413115
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,169,4040.41,169000,-3.7103,0.47,-11.81,365.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-26_22-17-15
  done: false
  episode_len_mean: 369.05
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.7603999999999678
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 473
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.261880456076728
          entropy_coeff: 0.009999999999999998
          kl: 0.006894015268936416
          policy_loss: 0.04995582335525089
          total_loss: 0.07086912892344925
          vf_explained_var: 0.22745203971862793
          vf_loss: 0.04132353206061655
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,170,4060.64,170000,-3.7604,0.47,-11.81,369.05


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-26_22-17-35
  done: false
  episode_len_mean: 370.49
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.774799999999967
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 475
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.31441261238522
          entropy_coeff: 0.009999999999999998
          kl: 0.0067483117799690285
          policy_loss: -0.1392247050586674
          total_loss: -0.14867750818116798
          vf_explained_var: 0.5161355137825012
          vf_loss: 0.011529424385581579
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,171,4080.41,171000,-3.7748,0.47,-11.81,370.49


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-26_22-17-55
  done: false
  episode_len_mean: 371.47
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.7935999999999668
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 478
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.3719915575451322
          entropy_coeff: 0.009999999999999998
          kl: 0.014526689192387466
          policy_loss: 0.0036306160191694895
          total_loss: -0.004525612791379293
          vf_explained_var: 0.6538330316543579
          vf_loss: 0.01090989682254278
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,172,4099.86,172000,-3.7936,0.47,-11.81,371.47




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-26_22-18-34
  done: false
  episode_len_mean: 373.09
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.8097999999999668
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 481
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.279130803214179
          entropy_coeff: 0.009999999999999998
          kl: 0.012036955104003037
          policy_loss: 0.0012262609269883898
          total_loss: -0.005972775154643588
          vf_explained_var: 0.4222016930580139
          vf_loss: 0.011736093427882427
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,173,4139.62,173000,-3.8098,0.47,-11.81,373.09


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-26_22-18-55
  done: false
  episode_len_mean: 374.65
  episode_media: {}
  episode_reward_max: 0.46999999999999975
  episode_reward_mean: -3.8340999999999665
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 483
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.290000091658698
          entropy_coeff: 0.009999999999999998
          kl: 0.014206786609733473
          policy_loss: -0.07385883050867253
          total_loss: 0.08670181946622
          vf_explained_var: 0.16255991160869598
          vf_loss: 0.17890934972609912
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 17400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,174,4159.85,174000,-3.8341,0.47,-11.81,374.65


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-26_22-19-14
  done: false
  episode_len_mean: 377.34
  episode_media: {}
  episode_reward_max: -0.8499999999999929
  episode_reward_mean: -3.8812999999999653
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 486
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.4096783743964303
          entropy_coeff: 0.009999999999999998
          kl: 0.015681376014350965
          policy_loss: -0.0019990531934632196
          total_loss: 0.011109435392750635
          vf_explained_var: 0.17970234155654907
          vf_loss: 0.0321815675124526
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,175,4179.54,175000,-3.8813,-0.85,-11.81,377.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-26_22-19-36
  done: false
  episode_len_mean: 377.45
  episode_media: {}
  episode_reward_max: -0.8499999999999929
  episode_reward_mean: -3.8721999999999652
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 488
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.3334773010677763
          entropy_coeff: 0.009999999999999998
          kl: 0.01242059922487518
          policy_loss: -0.0895013684199916
          total_loss: -0.0891623714317878
          vf_explained_var: 0.7985448837280273
          vf_loss: 0.01969468700699508
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 1760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,176,4200.91,176000,-3.8722,-0.85,-11.81,377.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-26_22-19-57
  done: false
  episode_len_mean: 378.18
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.8584999999999643
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 491
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2272895680533513
          entropy_coeff: 0.009999999999999998
          kl: 0.04539407430571324
          policy_loss: -0.15747499089274142
          total_loss: 0.01940955865300364
          vf_explained_var: 0.6163669228553772
          vf_loss: 0.184614937286824
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 17700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,177,4221.93,177000,-3.8585,0.39,-11.81,378.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-26_22-20-18
  done: false
  episode_len_mean: 376.6
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.8828999999999643
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 494
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3329594850540163
          entropy_coeff: 0.009999999999999998
          kl: 0.01217493513649502
          policy_loss: -0.03504777948061625
          total_loss: 0.013175926854213078
          vf_explained_var: 0.5372480154037476
          vf_loss: 0.06570273438572055
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 1780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,178,4243.53,178000,-3.8829,0.39,-11.81,376.6


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-26_22-20-39
  done: false
  episode_len_mean: 375.21
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.886899999999965
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 497
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3067545784844294
          entropy_coeff: 0.009999999999999998
          kl: 0.017929536704290108
          policy_loss: -0.03888000225027402
          total_loss: 0.10749236308038235
          vf_explained_var: 0.4049966633319855
          vf_loss: 0.16082401523987452
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 1790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,179,4264.03,179000,-3.8869,0.39,-11.81,375.21


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-26_22-21-00
  done: false
  episode_len_mean: 374.8
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.9005999999999648
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 2
  episodes_total: 499
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.258635762002733
          entropy_coeff: 0.009999999999999998
          kl: 0.012708582410564074
          policy_loss: -0.01198444426473644
          total_loss: 0.10606287601921294
          vf_explained_var: 0.6823891401290894
          vf_loss: 0.13452667316628827
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 18000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,180,4285.13,180000,-3.9006,0.39,-11.81,374.8


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-26_22-21-21
  done: false
  episode_len_mean: 374.68
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.9367999999999648
  episode_reward_min: -11.809999999999926
  episodes_this_iter: 3
  episodes_total: 502
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3250065777036877
          entropy_coeff: 0.009999999999999998
          kl: 0.0106948583207553
          policy_loss: 0.07895064685079786
          total_loss: 0.1417471756744716
          vf_explained_var: 0.5769373774528503
          vf_loss: 0.0809072665249308
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,181,4305.86,181000,-3.9368,0.39,-11.81,374.68


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-26_22-21-42
  done: false
  episode_len_mean: 374.13
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.842899999999966
  episode_reward_min: -9.50999999999994
  episodes_this_iter: 3
  episodes_total: 505
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2717790921529133
          entropy_coeff: 0.009999999999999998
          kl: 0.011950098698582224
          policy_loss: -0.0493016988866859
          total_loss: 0.080889289929635
          vf_explained_var: 0.46030837297439575
          vf_loss: 0.1471662582208713
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,182,4327.06,182000,-3.8429,0.39,-9.51,374.13


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-26_22-22-03
  done: false
  episode_len_mean: 374.81
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.848999999999967
  episode_reward_min: -9.50999999999994
  episodes_this_iter: 2
  episodes_total: 507
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.358477012316386
          entropy_coeff: 0.009999999999999998
          kl: 0.008634620526751884
          policy_loss: -0.2565384913235903
          total_loss: -0.21339114242129856
          vf_explained_var: 0.4352845549583435
          vf_loss: 0.062582820146862
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,183,4348.18,183000,-3.849,0.39,-9.51,374.81




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-26_22-22-43
  done: false
  episode_len_mean: 373.24
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.9012999999999662
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 510
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3476116206910875
          entropy_coeff: 0.009999999999999998
          kl: 0.007596225792022141
          policy_loss: -0.12164669334888459
          total_loss: -0.04499437045305967
          vf_explained_var: 0.4617255628108978
          vf_loss: 0.09647813369002607
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,184,4387.92,184000,-3.9013,0.39,-10.29,373.24


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-26_22-23-05
  done: false
  episode_len_mean: 372.88
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -3.9765999999999666
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 513
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2971659077538384
          entropy_coeff: 0.009999999999999998
          kl: 0.011014162183964792
          policy_loss: -0.05017374969191021
          total_loss: 0.059851547620362704
          vf_explained_var: 0.2668967843055725
          vf_loss: 0.12770418785512447
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,185,4409.73,185000,-3.9766,0.39,-10.29,372.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-26_22-23-27
  done: false
  episode_len_mean: 372.91
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -4.053899999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 516
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.317429116037157
          entropy_coeff: 0.009999999999999998
          kl: 0.011392729937054943
          policy_loss: -0.12237265701923106
          total_loss: 0.053942916852732496
          vf_explained_var: 0.386945515871048
          vf_loss: 0.19401517907778423
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 18600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,186,4431.69,186000,-4.0539,0.39,-10.29,372.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-26_22-23-50
  done: false
  episode_len_mean: 371.99
  episode_media: {}
  episode_reward_max: 0.39000000000002766
  episode_reward_mean: -4.083899999999966
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 519
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3067133797539605
          entropy_coeff: 0.009999999999999998
          kl: 0.01096910906279906
          policy_loss: -0.09385113327039613
          total_loss: -0.03925350838237339
          vf_explained_var: 0.34588077664375305
          vf_loss: 0.0723936401721504
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 1870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,187,4455.19,187000,-4.0839,0.39,-10.29,371.99




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-26_22-24-58
  done: false
  episode_len_mean: 371.45
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.0341999999999665
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 522
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2656992382473415
          entropy_coeff: 0.009999999999999998
          kl: 0.019615406809779322
          policy_loss: -0.039596232606305015
          total_loss: 0.37991645245088473
          vf_explained_var: 0.47088441252708435
          vf_loss: 0.432743650343683
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 1880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,188,4523.24,188000,-4.0342,2.79,-10.29,371.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-26_22-25-23
  done: false
  episode_len_mean: 370.76
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.085199999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 525
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2647224108378095
          entropy_coeff: 0.009999999999999998
          kl: 0.015595998042305591
          policy_loss: -0.05416837069723341
          total_loss: 0.04078332525160577
          vf_explained_var: 0.41937583684921265
          vf_loss: 0.11010438886781533
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 1890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,189,4548.23,189000,-4.0852,2.79,-10.29,370.76


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-26_22-25-47
  done: false
  episode_len_mean: 370.65
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.131699999999966
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 528
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.209772239791022
          entropy_coeff: 0.009999999999999998
          kl: 0.012813097350702357
          policy_loss: 0.0011765957706504398
          total_loss: 0.12260570956601037
          vf_explained_var: 0.43081361055374146
          vf_loss: 0.13736960738897325
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 1900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,190,4572.37,190000,-4.1317,2.79,-10.29,370.65


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-26_22-26-11
  done: false
  episode_len_mean: 369.45
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.150699999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 531
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.326027923160129
          entropy_coeff: 0.009999999999999998
          kl: 0.010881531996858367
          policy_loss: 0.026702500383059182
          total_loss: 0.049789409504996406
          vf_explained_var: 0.5300736427307129
          vf_loss: 0.041118158120661975
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 1910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,191,4595.81,191000,-4.1507,2.79,-10.29,369.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-26_22-26-34
  done: false
  episode_len_mean: 368.86
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.092999999999966
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 534
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3287371423509384
          entropy_coeff: 0.009999999999999998
          kl: 0.01076391709500782
          policy_loss: -0.08995789285335276
          total_loss: 0.08156086363726192
          vf_explained_var: 0.1319604068994522
          vf_loss: 0.18963361146549385
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,192,4619.03,192000,-4.093,2.79,-10.29,368.86


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-26_22-26-58
  done: false
  episode_len_mean: 368.3
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.097099999999966
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 537
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3529561281204225
          entropy_coeff: 0.009999999999999998
          kl: 0.011997428407114017
          policy_loss: -0.08751395667592685
          total_loss: -0.054398456464211146
          vf_explained_var: 0.5230932235717773
          vf_loss: 0.050879791068534054
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,193,4642.44,193000,-4.0971,2.79,-10.29,368.3


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-26_22-27-19
  done: false
  episode_len_mean: 367.76
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.090499999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 540
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2978264649709064
          entropy_coeff: 0.009999999999999998
          kl: 0.013837889903458404
          policy_loss: 0.020679524085587927
          total_loss: 0.09658315773639414
          vf_explained_var: 0.5921259522438049
          vf_loss: 0.09223221130669117
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 19400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,194,4664.28,194000,-4.0905,2.79,-10.29,367.76


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-26_22-27-43
  done: false
  episode_len_mean: 366.34
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.086699999999968
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 543
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2708101352055867
          entropy_coeff: 0.009999999999999998
          kl: 0.010794993380585993
          policy_loss: -0.16321659949090744
          total_loss: -0.16141261474953758
          vf_explained_var: 0.6841334700584412
          vf_loss: 0.01932463672839933
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 1950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,195,4687.68,195000,-4.0867,2.79,-10.29,366.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-26_22-28-06
  done: false
  episode_len_mean: 365.42
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.077699999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 2
  episodes_total: 545
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.152109220292833
          entropy_coeff: 0.009999999999999998
          kl: 0.011511431184763991
          policy_loss: -0.048033137950632306
          total_loss: -0.055042925808164805
          vf_explained_var: 0.8246409893035889
          vf_loss: 0.008979575294587348
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,196,4711.07,196000,-4.0777,2.79,-10.29,365.42


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-26_22-28-28
  done: false
  episode_len_mean: 364.84
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.071899999999967
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 548
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.069192265139686
          entropy_coeff: 0.009999999999999998
          kl: 0.011110005383036508
          policy_loss: -0.026423815058337317
          total_loss: -0.03271177320016755
          vf_explained_var: 0.7799867391586304
          vf_loss: 0.009065139619633556
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,197,4732.77,197000,-4.0719,2.79,-10.29,364.84




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-26_22-29-08
  done: false
  episode_len_mean: 364.45
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.0683999999999685
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 551
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1695056200027465
          entropy_coeff: 0.009999999999999998
          kl: 0.014873751040050405
          policy_loss: 0.10862586134009891
          total_loss: 0.10106382320324579
          vf_explained_var: 0.8153120279312134
          vf_loss: 0.0069855597361715305
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,198,4772.73,198000,-4.0684,2.79,-10.29,364.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-26_22-29-30
  done: false
  episode_len_mean: 362.9
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.053099999999969
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 554
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.100461739963955
          entropy_coeff: 0.009999999999999998
          kl: 0.014333649627081554
          policy_loss: 0.06556548807356093
          total_loss: 0.056568961052431
          vf_explained_var: 0.7184872031211853
          vf_loss: 0.005120167087039186
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,199,4794.33,199000,-4.0531,2.79,-10.29,362.9


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-26_22-29-52
  done: false
  episode_len_mean: 361.29
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.027499999999968
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 557
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.141997146606445
          entropy_coeff: 0.009999999999999998
          kl: 0.011658645740282204
          policy_loss: 0.03930358356899685
          total_loss: 0.032903994454277885
          vf_explained_var: 0.3105916678905487
          vf_loss: 0.009417909244075418
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 20000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,200,4816.8,200000,-4.0275,2.79,-10.29,361.29


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-26_22-30-14
  done: false
  episode_len_mean: 360.19
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -4.016699999999969
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 560
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0675991906060114
          entropy_coeff: 0.009999999999999998
          kl: 0.007807518450098256
          policy_loss: 0.03675085372394986
          total_loss: 0.027453429996967316
          vf_explained_var: 0.47630763053894043
          vf_loss: 0.007626721603770016
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,201,4839.03,201000,-4.0167,2.79,-10.29,360.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-26_22-30-37
  done: false
  episode_len_mean: 358.82
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.991599999999969
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 563
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1341961834165786
          entropy_coeff: 0.009999999999999998
          kl: 0.014468822069443298
          policy_loss: -0.006849668795863787
          total_loss: 0.1357834299819337
          vf_explained_var: 0.20611029863357544
          vf_loss: 0.15702218379721875
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 2020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,202,4861.44,202000,-3.9916,2.79,-10.29,358.82


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-26_22-30-59
  done: false
  episode_len_mean: 356.72
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.9513999999999694
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 566
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1342698203192816
          entropy_coeff: 0.009999999999999998
          kl: 0.011440754328114621
          policy_loss: 0.015735490454567805
          total_loss: 0.013093497355779011
          vf_explained_var: 0.3893946707248688
          vf_loss: 0.01320294319302775
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,203,4884.13,203000,-3.9514,2.79,-10.29,356.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-26_22-31-22
  done: false
  episode_len_mean: 355.32
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.9683999999999684
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 569
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.074854935540093
          entropy_coeff: 0.009999999999999998
          kl: 0.006103807952001164
          policy_loss: 0.02047874712281757
          total_loss: 0.010403881470362345
          vf_explained_var: -0.06300951540470123
          vf_loss: 0.0077405487122531565
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,204,4906.97,204000,-3.9684,2.79,-10.29,355.32


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-26_22-31-45
  done: false
  episode_len_mean: 353.64
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.9317999999999693
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 572
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1357473373413085
          entropy_coeff: 0.009999999999999998
          kl: 0.008508033445097313
          policy_loss: 0.0495820391509268
          total_loss: 0.03989131409260962
          vf_explained_var: 0.3005598783493042
          vf_loss: 0.007578277089891748
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 20500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,205,4929.7,205000,-3.9318,2.79,-10.29,353.64


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-26_22-32-08
  done: false
  episode_len_mean: 352.05
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.9172999999999694
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 575
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1773431115680273
          entropy_coeff: 0.009999999999999998
          kl: 0.008277449572305941
          policy_loss: 0.024604091048240663
          total_loss: 0.016373431268665525
          vf_explained_var: -0.08250058442354202
          vf_loss: 0.009565108163385756
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,206,4952.61,206000,-3.9173,2.79,-10.29,352.05


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-26_22-32-30
  done: false
  episode_len_mean: 350.27
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.89049999999997
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 578
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.099393873744541
          entropy_coeff: 0.009999999999999998
          kl: 0.010654267721008795
          policy_loss: 0.029064346849918366
          total_loss: 0.024096226278278562
          vf_explained_var: 0.4127701222896576
          vf_loss: 0.010905994738762578
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 20700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,207,4975.01,207000,-3.8905,2.79,-10.29,350.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-26_22-33-11
  done: false
  episode_len_mean: 349.25
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.8802999999999703
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 581
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0656408031781512
          entropy_coeff: 0.009999999999999998
          kl: 0.00850700425383807
          policy_loss: 0.03999806154105398
          total_loss: 0.028642282138268152
          vf_explained_var: 0.5763236284255981
          vf_loss: 0.005212655550955484
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 2080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,208,5015.71,208000,-3.8803,2.79,-10.29,349.25


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-26_22-33-34
  done: false
  episode_len_mean: 347.67
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.8760999999999695
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 584
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.192266578144497
          entropy_coeff: 0.009999999999999998
          kl: 0.016402153856111646
          policy_loss: -0.013703177703751459
          total_loss: -0.01582478168937895
          vf_explained_var: 0.4648977518081665
          vf_loss: 0.011919133911012776
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,209,5038.84,209000,-3.8761,2.79,-10.29,347.67


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-26_22-33-58
  done: false
  episode_len_mean: 345.47
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.87419999999997
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 587
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.065255833996667
          entropy_coeff: 0.009999999999999998
          kl: 0.0065728630602395295
          policy_loss: 0.04987748016913732
          total_loss: 0.03852434915800889
          vf_explained_var: 0.6650003790855408
          vf_loss: 0.006140889134258032
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,210,5062.82,210000,-3.8742,2.79,-10.29,345.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-26_22-34-20
  done: false
  episode_len_mean: 344.96
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.8790999999999696
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 590
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.114531644185384
          entropy_coeff: 0.009999999999999998
          kl: 0.00869534239371773
          policy_loss: 0.06513691188560591
          total_loss: 0.058855331357982425
          vf_explained_var: 0.237808495759964
          vf_loss: 0.010685259517696168
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,211,5084.21,211000,-3.8791,2.79,-10.29,344.96


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-26_22-34-42
  done: false
  episode_len_mean: 344.1
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.8433999999999715
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 593
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2367808394961886
          entropy_coeff: 0.009999999999999998
          kl: 0.004763234768003195
          policy_loss: 0.04652092845903503
          total_loss: 0.03233851504822572
          vf_explained_var: 0.7468599677085876
          vf_loss: 0.005896460440837675
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 21200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,212,5106.44,212000,-3.8434,2.79,-10.29,344.1


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-26_22-35-04
  done: false
  episode_len_mean: 343.54
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.8284999999999707
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 596
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.158386935128106
          entropy_coeff: 0.009999999999999998
          kl: 0.0077107715212253285
          policy_loss: -0.0009628848483165105
          total_loss: -0.004381926957931784
          vf_explained_var: 0.30280423164367676
          vf_loss: 0.016312153632235198
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,213,5128.71,213000,-3.8285,2.79,-10.29,343.54


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-26_22-35-25
  done: false
  episode_len_mean: 342.92
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.7952999999999713
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 2
  episodes_total: 598
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.101566770341661
          entropy_coeff: 0.009999999999999998
          kl: 0.0177845626443281
          policy_loss: -0.12184589323070315
          total_loss: -0.1274376965645287
          vf_explained_var: 0.37691056728363037
          vf_loss: 0.011150745762926009
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 21400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,214,5149.7,214000,-3.7953,2.79,-10.29,342.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-26_22-35-48
  done: false
  episode_len_mean: 341.88
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.7081999999999713
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 3
  episodes_total: 601
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.06307536760966
          entropy_coeff: 0.009999999999999998
          kl: 0.008189988800047605
          policy_loss: -0.10643710676166747
          total_loss: -0.11444124852617582
          vf_explained_var: 0.5458593368530273
          vf_loss: 0.010658795200288295
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 2150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,215,5172.18,215000,-3.7082,2.79,-10.29,341.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-26_22-36-11
  done: false
  episode_len_mean: 340.6
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.7069999999999728
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 4
  episodes_total: 605
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.9984996610217625
          entropy_coeff: 0.009999999999999998
          kl: 0.014352168694954036
          policy_loss: -0.05995497206846873
          total_loss: -0.06667070537805557
          vf_explained_var: 0.6363044381141663
          vf_loss: 0.009820852149277925
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,216,5195.46,216000,-3.707,2.79,-10.29,340.6


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-26_22-36-34
  done: false
  episode_len_mean: 339.05
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.6921999999999717
  episode_reward_min: -10.289999999999965
  episodes_this_iter: 2
  episodes_total: 607
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.0160121334923637
          entropy_coeff: 0.009999999999999998
          kl: 0.004150191670655337
          policy_loss: -0.1354136394129859
          total_loss: -0.14934945305188496
          vf_explained_var: 0.8271165490150452
          vf_loss: 0.005227137463710581
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,217,5218.15,217000,-3.6922,2.79,-10.29,339.05




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-26_22-37-14
  done: false
  episode_len_mean: 339.47
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.5666999999999716
  episode_reward_min: -8.71999999999995
  episodes_this_iter: 4
  episodes_total: 611
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9210202813148498
          entropy_coeff: 0.009999999999999998
          kl: 0.010373790971761564
          policy_loss: -0.05387344252732065
          total_loss: -0.05567632226480378
          vf_explained_var: 0.5382281541824341
          vf_loss: 0.01616105933466719
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 2180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,218,5258.36,218000,-3.5667,2.79,-8.72,339.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-26_22-37-35
  done: false
  episode_len_mean: 339.55
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.606499999999972
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 613
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.9815361579259236
          entropy_coeff: 0.009999999999999998
          kl: 0.018087265455498276
          policy_loss: -0.04815838618410958
          total_loss: 0.03957497767276234
          vf_explained_var: 0.45640936493873596
          vf_loss: 0.10537580531090499
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 2190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,219,5279.51,219000,-3.6065,2.79,-8.78,339.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-26_22-37-59
  done: false
  episode_len_mean: 339.02
  episode_media: {}
  episode_reward_max: 2.7900000000000116
  episode_reward_mean: -3.5334999999999726
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 616
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.8415015008714464
          entropy_coeff: 0.009999999999999998
          kl: 0.01318392714093785
          policy_loss: -0.08345170766115188
          total_loss: -0.09378802428642909
          vf_explained_var: 0.8162569403648376
          vf_loss: 0.006494837498757988
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,220,5303.26,220000,-3.5335,2.79,-8.78,339.02


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-26_22-38-23
  done: false
  episode_len_mean: 338.31
  episode_media: {}
  episode_reward_max: -0.1799999999999879
  episode_reward_mean: -3.5480999999999723
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 4
  episodes_total: 620
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.809896128707462
          entropy_coeff: 0.009999999999999998
          kl: 0.011859217803880048
          policy_loss: -0.052185601037409574
          total_loss: -0.06372814464072386
          vf_explained_var: 0.8661167621612549
          vf_loss: 0.005131701385188434
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,221,5327.21,221000,-3.5481,-0.18,-8.78,338.31


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-26_22-38-46
  done: false
  episode_len_mean: 337.72
  episode_media: {}
  episode_reward_max: -0.1799999999999879
  episode_reward_mean: -3.5074999999999714
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 623
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.7324645876884461
          entropy_coeff: 0.009999999999999998
          kl: 0.010830905519136439
          policy_loss: -0.020608162652287217
          total_loss: -0.027802445677419504
          vf_explained_var: 0.3982659578323364
          vf_loss: 0.008829185810625657
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,222,5349.92,222000,-3.5075,-0.18,-8.78,337.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-26_22-39-09
  done: false
  episode_len_mean: 337.04
  episode_media: {}
  episode_reward_max: -0.1799999999999879
  episode_reward_mean: -3.463099999999973
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 626
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12013549804687496
          cur_lr: 5.000000000000001e-05
          entropy: 1.978050336572859
          entropy_coeff: 0.009999999999999998
          kl: 0.020047072447940194
          policy_loss: 0.0717071005039745
          total_loss: 0.05793831340140766
          vf_explained_var: 0.9238448143005371
          vf_loss: 0.0036033517758672436
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 22300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,223,5373.54,223000,-3.4631,-0.18,-8.78,337.04


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-26_22-39-31
  done: false
  episode_len_mean: 337.37
  episode_media: {}
  episode_reward_max: -0.1799999999999879
  episode_reward_mean: -3.4467999999999734
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 629
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.024118537373013
          entropy_coeff: 0.009999999999999998
          kl: 0.020033680257757917
          policy_loss: 0.015619089951117833
          total_loss: 0.01850550700392988
          vf_explained_var: 0.7445630431175232
          vf_loss: 0.019517471152357756
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,224,5395.6,224000,-3.4468,-0.18,-8.78,337.37


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-26_22-39-53
  done: false
  episode_len_mean: 338.19
  episode_media: {}
  episode_reward_max: -1.4500000000000026
  episode_reward_mean: -3.4552999999999723
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 632
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.9279865529802112
          entropy_coeff: 0.009999999999999998
          kl: 0.014843102293075934
          policy_loss: -0.0748923088527388
          total_loss: -0.04322464296387302
          vf_explained_var: 0.4696248769760132
          vf_loss: 0.04693536836033066
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 2250

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,225,5416.76,225000,-3.4553,-1.45,-8.78,338.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-26_22-40-15
  done: false
  episode_len_mean: 338.38
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.466899999999973
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 634
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.899841276804606
          entropy_coeff: 0.009999999999999998
          kl: 0.011333741658115572
          policy_loss: -0.11672495942976739
          total_loss: -0.07001879453245137
          vf_explained_var: 0.3532034158706665
          vf_loss: 0.0626410121512082
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,226,5438.81,226000,-3.4669,-2.2,-8.78,338.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-26_22-40-37
  done: false
  episode_len_mean: 338.42
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.4777999999999722
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 637
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.883231516679128
          entropy_coeff: 0.009999999999999998
          kl: 0.013760982272449181
          policy_loss: -0.1041947594533364
          total_loss: -0.09048558734357356
          vf_explained_var: 0.7040407657623291
          vf_loss: 0.02882182974782255
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,227,5461.06,227000,-3.4778,-2.2,-8.78,338.42




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-26_22-41-13
  done: false
  episode_len_mean: 338.34
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.449599999999972
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 640
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.9977255331145392
          entropy_coeff: 0.009999999999999998
          kl: 0.024578442273072633
          policy_loss: 0.07152785174548626
          total_loss: 0.08231454040441248
          vf_explained_var: 0.44066542387008667
          vf_loss: 0.024120271214956624
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 22800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,228,5497.5,228000,-3.4496,-2.2,-8.78,338.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-26_22-41-37
  done: false
  episode_len_mean: 338.93
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.5127999999999724
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 643
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9479670166969298
          entropy_coeff: 0.009999999999999998
          kl: 0.008876115895641324
          policy_loss: -0.012914443098836475
          total_loss: 0.08555543824202484
          vf_explained_var: 0.3197985887527466
          vf_loss: 0.11435066221488846
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,229,5521.05,229000,-3.5128,-2.2,-8.78,338.93


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-26_22-41-59
  done: false
  episode_len_mean: 339.08
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.523599999999972
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 646
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 2.035753643512726
          entropy_coeff: 0.009999999999999998
          kl: 0.008894434671686016
          policy_loss: 0.07952289026644495
          total_loss: 0.10574816134240893
          vf_explained_var: 0.26480183005332947
          vf_loss: 0.0429764933573703
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,230,5542.9,230000,-3.5236,-2.2,-8.78,339.08


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-26_22-42-19
  done: false
  episode_len_mean: 338.81
  episode_media: {}
  episode_reward_max: -2.199999999999985
  episode_reward_mean: -3.5400999999999727
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 648
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8303581568929883
          entropy_coeff: 0.009999999999999998
          kl: 0.008790738995752929
          policy_loss: -0.078284507110301
          total_loss: -0.05286247158009145
          vf_explained_var: 0.4416642189025879
          vf_loss: 0.040161346043977475
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 2310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,231,5563.36,231000,-3.5401,-2.2,-8.78,338.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-26_22-42-39
  done: false
  episode_len_mean: 340.15
  episode_media: {}
  episode_reward_max: -1.849999999999978
  episode_reward_mean: -3.5508999999999724
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 651
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.893789972199334
          entropy_coeff: 0.009999999999999998
          kl: 0.015066755564951961
          policy_loss: -0.0578368893927998
          total_loss: -0.0034803330898284914
          vf_explained_var: 0.16017389297485352
          vf_loss: 0.06718552800723249
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,232,5583.52,232000,-3.5509,-1.85,-8.78,340.15


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-26_22-42-57
  done: false
  episode_len_mean: 342.94
  episode_media: {}
  episode_reward_max: -1.849999999999978
  episode_reward_mean: -3.577799999999972
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 653
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.5944139745500352
          entropy_coeff: 0.009999999999999998
          kl: 0.013888559555523313
          policy_loss: 0.006040028731028239
          total_loss: 0.03479443697465791
          vf_explained_var: -0.0043597472831606865
          vf_loss: 0.039067333581139486
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,233,5600.72,233000,-3.5778,-1.85,-8.78,342.94


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-26_22-43-16
  done: false
  episode_len_mean: 344.55
  episode_media: {}
  episode_reward_max: -1.849999999999978
  episode_reward_mean: -3.581099999999972
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 656
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8008521132998996
          entropy_coeff: 0.009999999999999998
          kl: 0.014970227584410958
          policy_loss: -0.07508861124515534
          total_loss: 0.021968484307742783
          vf_explained_var: 0.28931280970573425
          vf_loss: 0.10899582744265596
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,234,5620.42,234000,-3.5811,-1.85,-8.78,344.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-26_22-43-35
  done: false
  episode_len_mean: 345.68
  episode_media: {}
  episode_reward_max: -1.849999999999978
  episode_reward_mean: -3.5906999999999716
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 658
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.855696631802453
          entropy_coeff: 0.009999999999999998
          kl: 0.015119419447556827
          policy_loss: -0.025825649251540503
          total_loss: 0.01495686024427414
          vf_explained_var: 0.5931426286697388
          vf_loss: 0.053209202809052336
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,235,5639.47,235000,-3.5907,-1.85,-8.78,345.68




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-26_22-44-36
  done: false
  episode_len_mean: 345.82
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.552599999999971
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 661
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.7938179095586142
          entropy_coeff: 0.009999999999999998
          kl: 0.01852198924015276
          policy_loss: -0.07435986383093728
          total_loss: 0.007390204403135512
          vf_explained_var: 0.3033483028411865
          vf_loss: 0.09217837183839744
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,236,5700.15,236000,-3.5526,4.08,-8.78,345.82


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-26_22-44-54
  done: false
  episode_len_mean: 347.88
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.5903999999999705
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 663
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9026661025153266
          entropy_coeff: 0.009999999999999998
          kl: 0.01262270338643224
          policy_loss: -0.016675270017650394
          total_loss: 0.05781119283702638
          vf_explained_var: 0.4044830799102783
          vf_loss: 0.08839515590419372
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 23700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,237,5718.01,237000,-3.5904,4.08,-8.78,347.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-26_22-45-13
  done: false
  episode_len_mean: 349.72
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.6075999999999704
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 665
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8114957796202766
          entropy_coeff: 0.009999999999999998
          kl: 0.00701565209127389
          policy_loss: -0.05926168113946915
          total_loss: -0.06172801098889775
          vf_explained_var: 0.38781118392944336
          vf_loss: 0.012804080188895265
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,238,5736.44,238000,-3.6076,4.08,-8.78,349.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-26_22-45-30
  done: false
  episode_len_mean: 352.72
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.64229999999997
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 668
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 2.0215247366163465
          entropy_coeff: 0.009999999999999998
          kl: 0.008950883002519896
          policy_loss: -0.05788168762293127
          total_loss: 0.016433541932039792
          vf_explained_var: 0.23435591161251068
          vf_loss: 0.09090127493772242
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 23900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,239,5754,239000,-3.6423,4.08,-8.78,352.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-26_22-45-48
  done: false
  episode_len_mean: 355.47
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.6571999999999694
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 670
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.970040085580614
          entropy_coeff: 0.009999999999999998
          kl: 0.012209607921028681
          policy_loss: -0.034501176327466965
          total_loss: 0.021816291991207333
          vf_explained_var: 0.26587730646133423
          vf_loss: 0.07106739067369038
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,240,5771.69,240000,-3.6572,4.08,-8.78,355.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-26_22-46-05
  done: false
  episode_len_mean: 358.11
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.682199999999969
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 672
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.937909444173177
          entropy_coeff: 0.009999999999999998
          kl: 0.009266464388253315
          policy_loss: -0.027876437620984185
          total_loss: -0.004898440796467993
          vf_explained_var: 0.1942243129014969
          vf_loss: 0.03859993639505572
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 2410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,241,5788.92,241000,-3.6822,4.08,-8.78,358.11


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-26_22-46-27
  done: false
  episode_len_mean: 358.82
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.688099999999969
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 675
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8410458445549012
          entropy_coeff: 0.009999999999999998
          kl: 0.009917044729784935
          policy_loss: -0.003293954332669576
          total_loss: -0.0073127454353703394
          vf_explained_var: 0.7656556963920593
          vf_loss: 0.01037072787552865
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,242,5810.46,242000,-3.6881,4.08,-8.78,358.82


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-26_22-46-45
  done: false
  episode_len_mean: 360.54
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.6835999999999682
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 677
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.8078768836127388
          entropy_coeff: 0.009999999999999998
          kl: 0.010272192665223044
          policy_loss: -0.08172532220681508
          total_loss: 0.006545289419591427
          vf_explained_var: 0.4513828754425049
          vf_loss: 0.10218444439686007
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 2430

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,243,5829,243000,-3.6836,4.08,-8.78,360.54


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-26_22-47-05
  done: false
  episode_len_mean: 362.85
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.6842999999999675
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 680
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.9069171918763055
          entropy_coeff: 0.009999999999999998
          kl: 0.02162707065403146
          policy_loss: -0.11482556571977007
          total_loss: 0.0290543626062572
          vf_explained_var: 0.6183449029922485
          vf_loss: 0.15418024179525674
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,244,5848.81,244000,-3.6843,4.08,-8.78,362.85


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-26_22-47-25
  done: false
  episode_len_mean: 364.57
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.660899999999968
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 682
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.089949515130785
          entropy_coeff: 0.009999999999999998
          kl: 0.009874281095368096
          policy_loss: -0.1280183591776424
          total_loss: -0.08721468026439348
          vf_explained_var: 0.374420702457428
          vf_loss: 0.05569776897836062
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,245,5869.13,245000,-3.6609,4.08,-8.78,364.57


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-26_22-47-44
  done: false
  episode_len_mean: 367.45
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.688499999999968
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 685
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.1094284150335523
          entropy_coeff: 0.009999999999999998
          kl: 0.010876534560299007
          policy_loss: 0.01648150355451637
          total_loss: 0.020132199219531483
          vf_explained_var: 0.37188538908958435
          vf_loss: 0.0181300260230071
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,246,5887.57,246000,-3.6885,4.08,-8.78,367.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-26_22-48-02
  done: false
  episode_len_mean: 370.88
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.7281999999999673
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 687
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8958124412430657
          entropy_coeff: 0.009999999999999998
          kl: 0.010902524351551562
          policy_loss: 0.032059089342753096
          total_loss: 0.17802880006945795
          vf_explained_var: 0.4830828011035919
          vf_loss: 0.15829706920517816
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,247,5905.36,247000,-3.7282,4.08,-8.78,370.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-26_22-48-20
  done: false
  episode_len_mean: 372.83
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.753799999999967
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 689
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 2.01229987276925
          entropy_coeff: 0.009999999999999998
          kl: 0.013248487601995046
          policy_loss: -0.0204683985768093
          total_loss: 0.033724678080115054
          vf_explained_var: 0.5422191023826599
          vf_loss: 0.06625853049465352
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,248,5924.17,248000,-3.7538,4.08,-8.78,372.83




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-26_22-48-58
  done: false
  episode_len_mean: 373.64
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.7596999999999663
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 691
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.8642124149534438
          entropy_coeff: 0.009999999999999998
          kl: 0.008924068280047947
          policy_loss: -0.12895106942289405
          total_loss: -0.09348558224737644
          vf_explained_var: 0.6128149032592773
          vf_loss: 0.04868011558428407
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 24900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,249,5961.54,249000,-3.7597,4.08,-8.78,373.64




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-26_22-49-59
  done: false
  episode_len_mean: 374.59
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.687499999999966
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 694
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.9541585021548802
          entropy_coeff: 0.009999999999999998
          kl: 0.02429959009591243
          policy_loss: -0.12467897459864616
          total_loss: 0.02547127016716533
          vf_explained_var: 0.37317797541618347
          vf_loss: 0.15491316169500352
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,250,6022.4,250000,-3.6875,4.08,-8.78,374.59


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-26_22-50-18
  done: false
  episode_len_mean: 376.77
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.7069999999999665
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 697
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8123698936568367
          entropy_coeff: 0.009999999999999998
          kl: 0.007469793183118867
          policy_loss: 0.05636573632558187
          total_loss: 0.06500961755712827
          vf_explained_var: 0.5512741804122925
          vf_loss: 0.019953043262163798
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,251,6041.76,251000,-3.707,4.08,-8.78,376.77


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-26_22-50-35
  done: false
  episode_len_mean: 378.74
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.7442999999999658
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 699
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.9543842448128594
          entropy_coeff: 0.009999999999999998
          kl: 0.014370910585459602
          policy_loss: -0.0012163687083456252
          total_loss: 0.14266624657644167
          vf_explained_var: 0.4830244183540344
          vf_loss: 0.15031618169612354
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 2520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,252,6059.01,252000,-3.7443,4.08,-8.78,378.74


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-26_22-50-55
  done: false
  episode_len_mean: 379.94
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.7739999999999663
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 701
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8602267543474833
          entropy_coeff: 0.009999999999999998
          kl: 0.007891239731276818
          policy_loss: -0.08838021068109406
          total_loss: -0.009950821474194526
          vf_explained_var: 0.49021753668785095
          vf_loss: 0.08983264503379663
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,253,6078.66,253000,-3.774,4.08,-8.78,379.94


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-26_22-51-10
  done: false
  episode_len_mean: 383.64
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.838099999999966
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 703
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.425760895676083
          entropy_coeff: 0.009999999999999998
          kl: 0.007025114400166347
          policy_loss: 0.03910549332698186
          total_loss: 0.05671714639498128
          vf_explained_var: 0.22049586474895477
          vf_loss: 0.02546039745470302
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,254,6094.06,254000,-3.8381,4.08,-8.78,383.64


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-26_22-51-30
  done: false
  episode_len_mean: 386.38
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.864299999999965
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 3
  episodes_total: 706
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.8441607793172201
          entropy_coeff: 0.009999999999999998
          kl: 0.00969095361802018
          policy_loss: 0.05074947261148029
          total_loss: 0.06656337860557768
          vf_explained_var: 0.41905632615089417
          vf_loss: 0.025414659620987046
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,255,6113.3,255000,-3.8643,4.08,-8.78,386.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-26_22-51-48
  done: false
  episode_len_mean: 388.17
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.885299999999964
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 708
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.885148576895396
          entropy_coeff: 0.009999999999999998
          kl: 0.005972760169916608
          policy_loss: 0.02638040797577964
          total_loss: 0.02272107783291075
          vf_explained_var: 0.5931234359741211
          vf_loss: 0.00974333418222765
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,256,6131.06,256000,-3.8853,4.08,-8.78,388.17


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-26_22-52-06
  done: false
  episode_len_mean: 390.37
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.9161999999999635
  episode_reward_min: -8.779999999999964
  episodes_this_iter: 2
  episodes_total: 710
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9122789382934571
          cur_lr: 5.000000000000001e-05
          entropy: 1.9614636937777201
          entropy_coeff: 0.009999999999999998
          kl: 0.0035978840114192006
          policy_loss: 0.098604522107376
          total_loss: 0.1149626531948646
          vf_explained_var: 0.4172840118408203
          vf_loss: 0.032690493928061594
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,257,6149.35,257000,-3.9162,4.08,-8.78,390.37


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-26_22-52-24
  done: false
  episode_len_mean: 392.4
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.9058999999999635
  episode_reward_min: -8.329999999999952
  episodes_this_iter: 2
  episodes_total: 712
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.910293632083469
          entropy_coeff: 0.009999999999999998
          kl: 0.012614739617290946
          policy_loss: -0.030181219345993465
          total_loss: 0.18634410372210874
          vf_explained_var: 0.07750091701745987
          vf_loss: 0.22987417558001147
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 25800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,258,6167.25,258000,-3.9059,4.08,-8.33,392.4


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-26_22-52-42
  done: false
  episode_len_mean: 394.82
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.868399999999964
  episode_reward_min: -8.329999999999952
  episodes_this_iter: 3
  episodes_total: 715
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8667590883043077
          entropy_coeff: 0.009999999999999998
          kl: 0.011190218574325783
          policy_loss: 0.05217740784088771
          total_loss: 0.07368081203765339
          vf_explained_var: 0.5124814510345459
          vf_loss: 0.035066696608232126
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,259,6185.87,259000,-3.8684,4.08,-8.33,394.82


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-26_22-53-00
  done: false
  episode_len_mean: 398.16
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.901399999999963
  episode_reward_min: -8.329999999999952
  episodes_this_iter: 2
  episodes_total: 717
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8253410591019525
          entropy_coeff: 0.009999999999999998
          kl: 0.00981389831878027
          policy_loss: -0.032609401270747185
          total_loss: -0.035648805937833254
          vf_explained_var: -0.2469576746225357
          vf_loss: 0.010737497980395952
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,260,6203.07,260000,-3.9014,4.08,-8.33,398.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-26_22-53-20
  done: false
  episode_len_mean: 400.68
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.895399999999963
  episode_reward_min: -8.329999999999952
  episodes_this_iter: 3
  episodes_total: 720
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 2.0028325902091133
          entropy_coeff: 0.009999999999999998
          kl: 0.012398245969264544
          policy_loss: -0.031659469174014195
          total_loss: 0.015408214016093148
          vf_explained_var: 0.42138850688934326
          vf_loss: 0.06144067717509137
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,261,6223.19,261000,-3.8954,4.08,-8.33,400.68


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-26_22-53-38
  done: false
  episode_len_mean: 402.36
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.8684999999999627
  episode_reward_min: -8.329999999999952
  episodes_this_iter: 2
  episodes_total: 722
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.9665798637602063
          entropy_coeff: 0.009999999999999998
          kl: 0.012052317955105958
          policy_loss: -0.0024587040146191916
          total_loss: 0.10545194769899051
          vf_explained_var: 0.4113612174987793
          vf_loss: 0.1220789129121436
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 2620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,262,6241.82,262000,-3.8685,4.08,-8.33,402.36




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-26_22-54-18
  done: false
  episode_len_mean: 403.46
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.9483999999999626
  episode_reward_min: -10.379999999999972
  episodes_this_iter: 3
  episodes_total: 725
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.91213495598899
          entropy_coeff: 0.009999999999999998
          kl: 0.004972552163748054
          policy_loss: -0.25723773058917787
          total_loss: -0.2632149542371432
          vf_explained_var: 0.470896452665329
          vf_loss: 0.010875948673735062
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,263,6281.04,263000,-3.9484,4.08,-10.38,403.46


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-26_22-54-37
  done: false
  episode_len_mean: 404.8
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.921399999999963
  episode_reward_min: -10.379999999999972
  episodes_this_iter: 2
  episodes_total: 727
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22806973457336427
          cur_lr: 5.000000000000001e-05
          entropy: 2.040134225951301
          entropy_coeff: 0.009999999999999998
          kl: 0.014463737427225709
          policy_loss: -0.004636909564336141
          total_loss: 0.019881784584787156
          vf_explained_var: 0.5108748078346252
          vf_loss: 0.04162129515575038
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 26400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,264,6300.42,264000,-3.9214,4.08,-10.38,404.8


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-26_22-54-56
  done: false
  episode_len_mean: 406.94
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -3.9107999999999628
  episode_reward_min: -10.379999999999972
  episodes_this_iter: 3
  episodes_total: 730
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22806973457336427
          cur_lr: 5.000000000000001e-05
          entropy: 1.9754609160953098
          entropy_coeff: 0.009999999999999998
          kl: 0.014018235291322432
          policy_loss: 0.027198329981830385
          total_loss: 0.08400211764706506
          vf_explained_var: 0.5273119211196899
          vf_loss: 0.07336126486253407
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 2650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,265,6319.8,265000,-3.9108,4.08,-10.38,406.94


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-26_22-55-18
  done: false
  episode_len_mean: 406.62
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.0274999999999626
  episode_reward_min: -12.609999999999967
  episodes_this_iter: 3
  episodes_total: 733
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22806973457336427
          cur_lr: 5.000000000000001e-05
          entropy: 1.8424313902854919
          entropy_coeff: 0.009999999999999998
          kl: 0.025099126991818467
          policy_loss: -0.03620023348679145
          total_loss: 0.1786708252090547
          vf_explained_var: 0.3510389029979706
          vf_loss: 0.2275710203167465
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,266,6341.81,266000,-4.0275,4.08,-12.61,406.62


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-26_22-55-37
  done: false
  episode_len_mean: 408.09
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.091899999999963
  episode_reward_min: -12.609999999999967
  episodes_this_iter: 2
  episodes_total: 735
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 1.7969429506195915
          entropy_coeff: 0.009999999999999998
          kl: 0.018449371903025577
          policy_loss: -0.05598214409417576
          total_loss: 0.00937354779905743
          vf_explained_var: 0.6551483869552612
          vf_loss: 0.07701351340446207
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,267,6359.94,267000,-4.0919,4.08,-12.61,408.09


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-26_22-55-57
  done: false
  episode_len_mean: 408.88
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.189199999999962
  episode_reward_min: -12.609999999999967
  episodes_this_iter: 2
  episodes_total: 737
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 1.9179462697770862
          entropy_coeff: 0.009999999999999998
          kl: 0.015334260596213293
          policy_loss: -0.06360326028532452
          total_loss: 0.0020181768470340306
          vf_explained_var: 0.6209217309951782
          vf_loss: 0.07955497944106658
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 2680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,268,6380.49,268000,-4.1892,4.08,-12.61,408.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-26_22-56-16
  done: false
  episode_len_mean: 410.55
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.293199999999962
  episode_reward_min: -12.609999999999967
  episodes_this_iter: 3
  episodes_total: 740
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 1.9494385997454324
          entropy_coeff: 0.009999999999999998
          kl: 0.01332436167707694
          policy_loss: -0.02363744436038865
          total_loss: 0.009957784165938696
          vf_explained_var: 0.5847480893135071
          vf_loss: 0.04853129001955191
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,269,6399.47,269000,-4.2932,4.08,-12.61,410.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-26_22-56-35
  done: false
  episode_len_mean: 411.95
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.356999999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 742
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.026923867066701
          entropy_coeff: 0.009999999999999998
          kl: 0.015635892380925418
          policy_loss: -0.00742742990454038
          total_loss: 0.26456136379597917
          vf_explained_var: 0.2662096619606018
          vf_loss: 0.2869089209371143
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,270,6418.54,270000,-4.357,4.08,-15.39,411.95




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-26_22-57-34
  done: false
  episode_len_mean: 412.62
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.383799999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 745
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.1083424515194364
          entropy_coeff: 0.009999999999999998
          kl: 0.013526562636547378
          policy_loss: 0.01741482557521926
          total_loss: 0.17562568961746164
          vf_explained_var: 0.3792422413825989
          vf_loss: 0.17466678449677098
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,271,6477.36,271000,-4.3838,4.08,-15.39,412.62


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-26_22-57-56
  done: false
  episode_len_mean: 413.95
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.366999999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 747
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.085288353761037
          entropy_coeff: 0.009999999999999998
          kl: 0.012734075690632531
          policy_loss: -0.11864920092953576
          total_loss: -0.047000334742996426
          vf_explained_var: 0.14363747835159302
          vf_loss: 0.08814536564879948
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 2720

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,272,6498.88,272000,-4.367,4.08,-15.39,413.95




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-26_22-58-44
  done: false
  episode_len_mean: 414.47
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.272099999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 750
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.061022871070438
          entropy_coeff: 0.009999999999999998
          kl: 0.009200073988038238
          policy_loss: 0.08785094080699815
          total_loss: 0.16654431712296275
          vf_explained_var: 0.31881552934646606
          vf_loss: 0.09615621488127443
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,273,6547.09,273000,-4.2721,4.08,-15.39,414.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-26_22-59-05
  done: false
  episode_len_mean: 413.72
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.304399999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 752
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.1149016247855292
          entropy_coeff: 0.009999999999999998
          kl: 0.016844171839534298
          policy_loss: -0.06253790474600263
          total_loss: -0.008436658978462219
          vf_explained_var: 0.3061247169971466
          vf_loss: 0.06948779109451506
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 2740

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,274,6568.11,274000,-4.3044,4.08,-15.39,413.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-26_22-59-25
  done: false
  episode_len_mean: 414.4
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.323599999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 755
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.2609165959888036
          entropy_coeff: 0.009999999999999998
          kl: 0.01292171791602706
          policy_loss: 0.08125495430496003
          total_loss: 0.08065439106689559
          vf_explained_var: 0.3710860311985016
          vf_loss: 0.017588021657947036
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,275,6587.98,275000,-4.3236,4.08,-15.39,414.4


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-26_22-59-44
  done: false
  episode_len_mean: 414.46
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.325099999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 757
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.262235821617974
          entropy_coeff: 0.009999999999999998
          kl: 0.010574287414886927
          policy_loss: 0.05236445168654124
          total_loss: 0.04548815555042691
          vf_explained_var: 0.3101809620857239
          vf_loss: 0.012128552807391517
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,276,6607.49,276000,-4.3251,4.08,-15.39,414.46


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-26_23-00-04
  done: false
  episode_len_mean: 414.41
  episode_media: {}
  episode_reward_max: 4.080000000000001
  episode_reward_mean: -4.355499999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 759
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.36558833916982
          entropy_coeff: 0.009999999999999998
          kl: 0.006075056504457748
          policy_loss: 0.06074025712700354
          total_loss: 0.07384498756792811
          vf_explained_var: 0.34371039271354675
          vf_loss: 0.034682307219029304
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,277,6627.48,277000,-4.3555,4.08,-15.39,414.41


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-26_23-00-24
  done: false
  episode_len_mean: 416.79
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.477699999999961
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 762
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.143610715866089
          entropy_coeff: 0.009999999999999998
          kl: 0.011857775884385694
          policy_loss: -0.06358208076821434
          total_loss: 0.12568037857611974
          vf_explained_var: 0.32505881786346436
          vf_loss: 0.20664197184766334
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,278,6646.72,278000,-4.4777,3.69,-15.39,416.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-26_23-00-43
  done: false
  episode_len_mean: 415.7
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.487799999999961
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 764
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.2037213113572864
          entropy_coeff: 0.009999999999999998
          kl: 0.011455450938850555
          policy_loss: -0.12783972695469856
          total_loss: -0.11114887620011965
          vf_explained_var: 0.3968803286552429
          vf_loss: 0.03480910582260953
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,279,6666.16,279000,-4.4878,3.69,-15.39,415.7


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-26_23-01-04
  done: false
  episode_len_mean: 413.88
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.491599999999961
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 767
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.114264946513706
          entropy_coeff: 0.009999999999999998
          kl: 0.013760541701974856
          policy_loss: 0.06507652501265208
          total_loss: 0.21103116306993697
          vf_explained_var: 0.3139573931694031
          vf_loss: 0.16238974610136614
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,280,6686.83,280000,-4.4916,3.69,-15.39,413.88


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-26_23-01-24
  done: false
  episode_len_mean: 413.03
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.454799999999961
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 769
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.327785841623942
          entropy_coeff: 0.009999999999999998
          kl: 0.017241766989858666
          policy_loss: -0.02067667328649097
          total_loss: 0.15838075263632667
          vf_explained_var: 0.4464995861053467
          vf_loss: 0.19643679811722703
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,281,6706.55,281000,-4.4548,3.69,-15.39,413.03


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-26_23-01-44
  done: false
  episode_len_mean: 411.02
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.443099999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 772
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.232400001419915
          entropy_coeff: 0.009999999999999998
          kl: 0.017553872293619478
          policy_loss: -5.6584676106770836e-05
          total_loss: 0.13456944906049303
          vf_explained_var: 0.5227845311164856
          vf_loss: 0.15094476826488973
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 28200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,282,6726.61,282000,-4.4431,3.69,-15.39,411.02


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-26_23-02-04
  done: false
  episode_len_mean: 411.17
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.414899999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 774
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 1.9256677124235364
          entropy_coeff: 0.009999999999999998
          kl: 0.010596474722818853
          policy_loss: -0.05300898287031386
          total_loss: 0.06538092721667554
          vf_explained_var: 0.49981245398521423
          vf_loss: 0.13402148072297373
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,283,6746.9,283000,-4.4149,3.69,-15.39,411.17


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-26_23-02-26
  done: false
  episode_len_mean: 410.22
  episode_media: {}
  episode_reward_max: 3.69000000000002
  episode_reward_mean: -4.483699999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 777
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 2.1507389730877344
          entropy_coeff: 0.009999999999999998
          kl: 0.030161412134528772
          policy_loss: 0.00881035245127148
          total_loss: 0.2328876337243451
          vf_explained_var: -0.006822675000876188
          vf_loss: 0.23526631519198418
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,284,6768.53,284000,-4.4837,3.69,-15.39,410.22




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-26_23-03-22
  done: false
  episode_len_mean: 405.25
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.3647999999999625
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 4
  episodes_total: 781
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 2.3145055294036867
          entropy_coeff: 0.009999999999999998
          kl: 0.011686353649677588
          policy_loss: 0.1080525173081292
          total_loss: 0.20162430248326726
          vf_explained_var: 0.37640902400016785
          vf_loss: 0.11071990426215861
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,285,6825.39,285000,-4.3648,5.53,-15.39,405.25


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-26_23-03-43
  done: false
  episode_len_mean: 404.31
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.425499999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 784
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 2.1942942089504665
          entropy_coeff: 0.009999999999999998
          kl: 0.01282050867935559
          policy_loss: -0.19532947623067431
          total_loss: -0.1833234128024843
          vf_explained_var: 0.7427030205726624
          vf_loss: 0.027370068617165087
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,286,6845.58,286000,-4.4255,5.53,-15.39,404.31


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-26_23-04-04
  done: false
  episode_len_mean: 403.32
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.388699999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 786
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.825083765718672
          entropy_coeff: 0.009999999999999998
          kl: 0.008159453256955808
          policy_loss: -0.0025841747721036274
          total_loss: -0.013921920789612664
          vf_explained_var: 0.8637131452560425
          vf_loss: 0.0027260143814298015
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,287,6866.42,287000,-4.3887,5.53,-15.39,403.32


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-26_23-04-24
  done: false
  episode_len_mean: 400.53
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.3765999999999625
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 789
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.8612268580330742
          entropy_coeff: 0.009999999999999998
          kl: 0.010640879851243628
          policy_loss: 0.03984911185171869
          total_loss: 0.0310187641531229
          vf_explained_var: 0.5930296182632446
          vf_loss: 0.004321479018674129
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,288,6887.3,288000,-4.3766,5.53,-15.39,400.53


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-26_23-04-45
  done: false
  episode_len_mean: 400.6
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.379699999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 791
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9465351170963712
          entropy_coeff: 0.009999999999999998
          kl: 0.006667166961030027
          policy_loss: -0.10067998394370078
          total_loss: -0.11261441343360477
          vf_explained_var: 0.46143850684165955
          vf_loss: 0.004109620527985195
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 2890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,289,6907.6,289000,-4.3797,5.53,-15.39,400.6


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-26_23-05-04
  done: false
  episode_len_mean: 401.49
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.470699999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 794
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9530833707915412
          entropy_coeff: 0.009999999999999998
          kl: 0.006398008893495898
          policy_loss: 0.06941820846663581
          total_loss: 0.05690849853886498
          vf_explained_var: 0.08836641162633896
          vf_loss: 0.0037379444622072495
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 2900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,290,6926.51,290000,-4.4707,5.53,-15.39,401.49


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-26_23-05-24
  done: false
  episode_len_mean: 400.33
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.4615999999999625
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 797
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9099188566207885
          entropy_coeff: 0.009999999999999998
          kl: 0.00507121164515504
          policy_loss: 0.120610336224652
          total_loss: 0.1066594530103935
          vf_explained_var: 0.9112955927848816
          vf_loss: 0.0025459759299539856
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,291,6946.77,291000,-4.4616,5.53,-15.39,400.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-26_23-05-44
  done: false
  episode_len_mean: 398.42
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.424899999999962
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 799
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.8926772541469998
          entropy_coeff: 0.009999999999999998
          kl: 0.008056046385927433
          policy_loss: -0.09302623122930527
          total_loss: -0.1031689499815305
          vf_explained_var: 0.5272597074508667
          vf_loss: 0.004650035964570836
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 29200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,292,6966.84,292000,-4.4249,5.53,-15.39,398.42


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-26_23-06-03
  done: false
  episode_len_mean: 398.2
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.367999999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 802
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9535734401808844
          entropy_coeff: 0.009999999999999998
          kl: 0.006973852037221009
          policy_loss: 0.0486263867881563
          total_loss: 0.03887723419401381
          vf_explained_var: 0.286708801984787
          vf_loss: 0.006207904286889566
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,293,6985.68,293000,-4.368,5.53,-15.39,398.2


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-26_23-06-22
  done: false
  episode_len_mean: 395.93
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.345299999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 804
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9411897646056282
          entropy_coeff: 0.009999999999999998
          kl: 0.014047587843896843
          policy_loss: -0.0471730910655525
          total_loss: 0.05076198784841431
          vf_explained_var: -0.1574135720729828
          vf_loss: 0.11013835924354175
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,294,7004.82,294000,-4.3453,5.53,-15.39,395.93


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-26_23-06-42
  done: false
  episode_len_mean: 395.65
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.343099999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 807
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.849352737267812
          entropy_coeff: 0.009999999999999998
          kl: 0.007770031934407272
          policy_loss: 0.0019649676978588104
          total_loss: -0.00537921032971806
          vf_explained_var: 0.5006734728813171
          vf_loss: 0.007162103408740627
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 2950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,295,7024.27,295000,-4.3431,5.53,-15.39,395.65


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-26_23-07-04
  done: false
  episode_len_mean: 393.24
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.310099999999964
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 810
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.615348948372735
          entropy_coeff: 0.009999999999999998
          kl: 0.004506457111841142
          policy_loss: 0.006541941232151455
          total_loss: -0.0012124780151579115
          vf_explained_var: 0.7153885960578918
          vf_loss: 0.006086549610416922
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,296,7046.36,296000,-4.3101,5.53,-15.39,393.24




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-26_23-07-43
  done: false
  episode_len_mean: 391.33
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.274299999999964
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 2
  episodes_total: 812
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 1.7971637792057462
          entropy_coeff: 0.009999999999999998
          kl: 0.009564549643077699
          policy_loss: -0.11525553382105297
          total_loss: -0.1245700791478157
          vf_explained_var: 0.21835371851921082
          vf_loss: 0.0062030380145491415
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,297,7085.26,297000,-4.2743,5.53,-15.39,391.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-26_23-08-05
  done: false
  episode_len_mean: 388.44
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.298199999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 815
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 1.4879438241322835
          entropy_coeff: 0.009999999999999998
          kl: 0.008276456345901965
          policy_loss: -0.05867435874210464
          total_loss: -0.05704856717752086
          vf_explained_var: 0.2995237112045288
          vf_loss: 0.014381670703490576
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 2980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,298,7107.45,298000,-4.2982,5.53,-15.39,388.44


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-26_23-08-31
  done: false
  episode_len_mean: 383.18
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.266999999999964
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 4
  episodes_total: 819
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 1.4925471001201205
          entropy_coeff: 0.009999999999999998
          kl: 0.005721037658847116
          policy_loss: 0.1458091297083431
          total_loss: 0.1409029988778962
          vf_explained_var: 0.6800746321678162
          vf_loss: 0.008551444066688419
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,299,7133.2,299000,-4.267,5.53,-15.39,383.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-26_23-08-56
  done: false
  episode_len_mean: 380.55
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.283499999999965
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 822
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 1.7226395183139378
          entropy_coeff: 0.009999999999999998
          kl: 0.02123436528893655
          policy_loss: 0.01916178365548452
          total_loss: 0.1620778633074628
          vf_explained_var: -5.930529732722789e-05
          vf_loss: 0.15469419401552942
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 30000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,300,7158.14,300000,-4.2835,5.53,-15.39,380.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-26_23-09-20
  done: false
  episode_len_mean: 379.46
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.163199999999963
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 3
  episodes_total: 825
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.5638733943303427
          entropy_coeff: 0.009999999999999998
          kl: 0.030222635850644625
          policy_loss: -0.03605732247233391
          total_loss: 0.05672811439467801
          vf_explained_var: 0.43455442786216736
          vf_loss: 0.09679245972074568
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 30100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,301,7183,301000,-4.1632,5.53,-15.39,379.46




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-26_23-10-29
  done: false
  episode_len_mean: 372.06
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -4.129699999999965
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 4
  episodes_total: 829
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.495784666803148
          entropy_coeff: 0.009999999999999998
          kl: 0.022934917194812995
          policy_loss: 0.016746337546242607
          total_loss: 0.4383596905403667
          vf_explained_var: 0.3194805681705475
          vf_loss: 0.4233308404684067
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,302,7251.4,302000,-4.1297,5.53,-15.39,372.06




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-26_23-11-35
  done: false
  episode_len_mean: 367.76
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -3.967899999999965
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 4
  episodes_total: 833
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.657802669207255
          entropy_coeff: 0.009999999999999998
          kl: 0.0068204147468965935
          policy_loss: -0.0724066817926036
          total_loss: 0.10688570845458242
          vf_explained_var: 0.4857064485549927
          vf_loss: 0.18996426600755917
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,303,7317.61,303000,-3.9679,5.53,-15.39,367.76


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-26_23-12-03
  done: false
  episode_len_mean: 363.0
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -3.7486999999999653
  episode_reward_min: -15.389999999999944
  episodes_this_iter: 4
  episodes_total: 837
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.7287214159965516
          entropy_coeff: 0.009999999999999998
          kl: 0.009294639405474411
          policy_loss: -0.05952277382214864
          total_loss: 0.13886750555700725
          vf_explained_var: 0.2859959304332733
          vf_loss: 0.20762878213491703
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,304,7345.18,304000,-3.7487,5.53,-15.39,363


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-26_23-12-33
  done: false
  episode_len_mean: 356.73
  episode_media: {}
  episode_reward_max: 5.530000000000005
  episode_reward_mean: -3.4083999999999666
  episode_reward_min: -9.809999999999963
  episodes_this_iter: 4
  episodes_total: 841
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.517671322822571
          entropy_coeff: 0.009999999999999998
          kl: 0.014122943401950019
          policy_loss: -0.027784924623039033
          total_loss: 0.33420864186353155
          vf_explained_var: 0.5018125176429749
          vf_loss: 0.36494048257461853
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,305,7375.55,305000,-3.4084,5.53,-9.81,356.73


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-26_23-13-06
  done: false
  episode_len_mean: 351.27
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -3.092699999999967
  episode_reward_min: -9.519999999999941
  episodes_this_iter: 4
  episodes_total: 845
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.6393849094708761
          entropy_coeff: 0.009999999999999998
          kl: 0.0171993155779395
          policy_loss: -0.0429238885641098
          total_loss: 0.9499171717299355
          vf_explained_var: 0.6941211819648743
          vf_loss: 0.9943411296440495
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,306,7408.24,306000,-3.0927,5.67,-9.52,351.27


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-26_23-13-36
  done: false
  episode_len_mean: 346.87
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -3.0256999999999663
  episode_reward_min: -9.519999999999941
  episodes_this_iter: 3
  episodes_total: 848
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.6577290693918865
          entropy_coeff: 0.009999999999999998
          kl: 0.012392794389461514
          policy_loss: -0.05182886082265112
          total_loss: 1.0884597649176915
          vf_explained_var: 0.4481945335865021
          vf_loss: 1.146134360631307
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,307,7438.33,307000,-3.0257,5.67,-9.52,346.87




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-26_23-15-05
  done: false
  episode_len_mean: 333.38
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -2.7198999999999676
  episode_reward_min: -9.519999999999941
  episodes_this_iter: 6
  episodes_total: 854
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.524781756930881
          entropy_coeff: 0.009999999999999998
          kl: 0.00876284837470773
          policy_loss: -0.060213596125443775
          total_loss: 0.3694758315467172
          vf_explained_var: 0.6546421647071838
          vf_loss: 0.4373490405579408
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,308,7527.33,308000,-2.7199,5.67,-9.52,333.38




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-26_23-16-48
  done: false
  episode_len_mean: 316.41
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -2.4187999999999694
  episode_reward_min: -8.479999999999952
  episodes_this_iter: 6
  episodes_total: 860
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.5014404389593337
          entropy_coeff: 0.009999999999999998
          kl: 0.01438916499897885
          policy_loss: -0.00786817396680514
          total_loss: 1.0190998600588905
          vf_explained_var: 0.43308427929878235
          vf_loss: 1.0295220977730222
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,309,7629.94,309000,-2.4188,5.67,-8.48,316.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-26_23-19-24
  done: false
  episode_len_mean: 295.91
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -1.963199999999973
  episode_reward_min: -10.49999999999995
  episodes_this_iter: 8
  episodes_total: 868
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.7602985779444376
          entropy_coeff: 0.009999999999999998
          kl: 0.012552140034904388
          policy_loss: 0.13646701922019322
          total_loss: 0.9084712713956833
          vf_explained_var: 0.5574313998222351
          vf_loss: 0.7787376791238785
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,310,7786.57,310000,-1.9632,5.67,-10.5,295.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-26_23-20-32
  done: false
  episode_len_mean: 287.14
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -1.8035999999999734
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 5
  episodes_total: 873
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.635550113519033
          entropy_coeff: 0.009999999999999998
          kl: 0.007795471351937246
          policy_loss: 0.06725499565816588
          total_loss: 0.9926906256212129
          vf_explained_var: 0.5127375721931458
          vf_loss: 0.9350406378507614
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,311,7854.66,311000,-1.8036,5.67,-11.4,287.14




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-26_23-21-43
  done: false
  episode_len_mean: 278.08
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -1.622099999999974
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 5
  episodes_total: 878
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.562542121940189
          entropy_coeff: 0.009999999999999998
          kl: 0.011262756202566138
          policy_loss: 0.046200666245487
          total_loss: 0.8416742321517733
          vf_explained_var: 0.6184872984886169
          vf_loss: 0.8013459649350908
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,312,7925.54,312000,-1.6221,5.67,-11.4,278.08


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-26_23-22-13
  done: false
  episode_len_mean: 277.26
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -1.6299999999999744
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 4
  episodes_total: 882
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.6745590209960937
          entropy_coeff: 0.009999999999999998
          kl: 0.009859013515385623
          policy_loss: -0.0608590135557784
          total_loss: 0.6879018487201797
          vf_explained_var: 0.4018242657184601
          vf_loss: 0.7569690161695083
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,313,7954.99,313000,-1.63,5.67,-11.4,277.26




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-26_23-23-53
  done: false
  episode_len_mean: 271.66
  episode_media: {}
  episode_reward_max: 5.6700000000000275
  episode_reward_mean: -1.4787999999999755
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 4
  episodes_total: 886
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.9423490444819131
          entropy_coeff: 0.009999999999999998
          kl: 0.009599529089615227
          policy_loss: -0.04572170310550266
          total_loss: 0.35057620170215764
          vf_explained_var: 0.34275686740875244
          vf_loss: 0.4074086618092325
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 3140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,314,8054.84,314000,-1.4788,5.67,-11.4,271.66




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-26_23-25-05
  done: false
  episode_len_mean: 265.45
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.3234999999999764
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 4
  episodes_total: 890
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.7277367326948379
          entropy_coeff: 0.009999999999999998
          kl: 0.016150402014468688
          policy_loss: -0.0207147516310215
          total_loss: 0.630356514453888
          vf_explained_var: 0.7180262207984924
          vf_loss: 0.654363154206011
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,315,8126.93,315000,-1.3235,8.23,-11.4,265.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-26_23-25-35
  done: false
  episode_len_mean: 260.03
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.303799999999977
  episode_reward_min: -11.399999999999954
  episodes_this_iter: 4
  episodes_total: 894
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.9678904983732435
          entropy_coeff: 0.009999999999999998
          kl: 0.017835923082631825
          policy_loss: 0.06409372107850181
          total_loss: 0.834533616900444
          vf_explained_var: 0.3068963885307312
          vf_loss: 0.7746737440427144
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,316,8156.89,316000,-1.3038,8.23,-11.4,260.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-26_23-26-43
  done: false
  episode_len_mean: 252.6
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.2887999999999773
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 5
  episodes_total: 899
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 2.0209280305438573
          entropy_coeff: 0.009999999999999998
          kl: 0.007394002487694588
          policy_loss: -0.2106351002636883
          total_loss: 0.26002531413816743
          vf_explained_var: 0.5658034682273865
          vf_loss: 0.48446683941615953
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,317,8225.67,317000,-1.2888,8.23,-14.35,252.6




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-26_23-28-44
  done: false
  episode_len_mean: 237.55
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.191299999999979
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 6
  episodes_total: 905
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8659522734582428
          cur_lr: 5.000000000000001e-05
          entropy: 1.7260056853294372
          entropy_coeff: 0.009999999999999998
          kl: 0.021938684452900623
          policy_loss: 0.11025371799866358
          total_loss: 1.6850134485297732
          vf_explained_var: 0.5399739742279053
          vf_loss: 1.573021951980061
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,318,8346.1,318000,-1.1913,8.23,-14.35,237.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-26_23-29-15
  done: false
  episode_len_mean: 234.02
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.1509999999999792
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 3
  episodes_total: 908
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.915290449725257
          entropy_coeff: 0.009999999999999998
          kl: 0.011515005193097573
          policy_loss: 0.03390086396700806
          total_loss: 1.0153373209138712
          vf_explained_var: 0.4146987795829773
          vf_loss: 0.9856322026915021
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,319,8377.6,319000,-1.151,8.23,-14.35,234.02




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-26_23-30-59
  done: false
  episode_len_mean: 224.57
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.0313999999999799
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 6
  episodes_total: 914
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.745900321006775
          entropy_coeff: 0.009999999999999998
          kl: 0.0113457283552814
          policy_loss: 0.029807388285795846
          total_loss: 0.9541744271914164
          vf_explained_var: 0.4578646421432495
          vf_loss: 0.927088760998514
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,320,8481.2,320000,-1.0314,8.23,-14.35,224.57


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-26_23-31-32
  done: false
  episode_len_mean: 222.1
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.95899999999998
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 4
  episodes_total: 918
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9016142037179735
          entropy_coeff: 0.009999999999999998
          kl: 0.005817505895396735
          policy_loss: -0.01833261458410157
          total_loss: 0.6841111601226859
          vf_explained_var: 0.4798163175582886
          vf_loss: 0.7139033940103319
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,321,8514.49,321000,-0.959,8.23,-14.35,222.1




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-26_23-33-10
  done: false
  episode_len_mean: 219.26
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.7645999999999805
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 4
  episodes_total: 922
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.975496346420712
          entropy_coeff: 0.009999999999999998
          kl: 0.010209151243705734
          policy_loss: -0.11179977125591702
          total_loss: 0.7454488298959202
          vf_explained_var: 0.36232680082321167
          vf_loss: 0.8637426071696811
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,322,8612.53,322000,-0.7646,8.23,-14.35,219.26




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-26_23-34-53
  done: false
  episode_len_mean: 216.62
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.6752999999999807
  episode_reward_min: -14.34999999999998
  episodes_this_iter: 5
  episodes_total: 927
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9467139720916748
          entropy_coeff: 0.009999999999999998
          kl: 0.007680057893747892
          policy_loss: -0.07311358758144909
          total_loss: 0.5101654244793786
          vf_explained_var: 0.23416048288345337
          vf_loss: 0.5927702991498841
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,323,8714.61,323000,-0.6753,8.23,-14.35,216.62


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-26_23-35-23
  done: false
  episode_len_mean: 216.24
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.8559999999999803
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 3
  episodes_total: 930
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.937814512517717
          entropy_coeff: 0.009999999999999998
          kl: 0.011601897051466123
          policy_loss: -0.008710888193713294
          total_loss: 0.6176155045628547
          vf_explained_var: 0.5242556929588318
          vf_loss: 0.6306345005830128
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,324,8744.96,324000,-0.856,8.23,-14.54,216.24




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-26_23-36-10
  done: false
  episode_len_mean: 217.34
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.8076999999999802
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 934
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.154096500078837
          entropy_coeff: 0.009999999999999998
          kl: 0.009115917694957634
          policy_loss: -0.10064737002054851
          total_loss: 0.3994911861916383
          vf_explained_var: 0.6671806573867798
          vf_loss: 0.5098386016156938
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,325,8791.57,325000,-0.8077,8.23,-14.54,217.34


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-26_23-36-39
  done: false
  episode_len_mean: 216.17
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.83839999999998
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 938
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8146284699440003
          entropy_coeff: 0.009999999999999998
          kl: 0.006998862030337517
          policy_loss: -0.09340387909776635
          total_loss: 0.7610313458575143
          vf_explained_var: 0.5119386911392212
          vf_loss: 0.8634904911120732
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,326,8821.35,326000,-0.8384,8.23,-14.54,216.17




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-26_23-37-50
  done: false
  episode_len_mean: 215.65
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -0.7827999999999798
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 942
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7351612501674227
          entropy_coeff: 0.009999999999999998
          kl: 0.008081156088031113
          policy_loss: -0.036660920745796625
          total_loss: 0.34547441572778753
          vf_explained_var: 0.828098475933075
          vf_loss: 0.38899010320504507
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 32700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,327,8891.9,327000,-0.7828,8.23,-14.54,215.65




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-26_23-38-56
  done: false
  episode_len_mean: 214.7
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.00279999999998
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 5
  episodes_total: 947
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.0509749386045666
          entropy_coeff: 0.009999999999999998
          kl: 0.0074542789384505126
          policy_loss: 0.009323303070333269
          total_loss: 0.5510249165818095
          vf_explained_var: 0.5335423946380615
          vf_loss: 0.5525287934475475
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,328,8958.4,328000,-1.0028,8.23,-14.54,214.7




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-26_23-40-07
  done: false
  episode_len_mean: 212.92
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.0016999999999798
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 951
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.8260688794983757
          entropy_coeff: 0.009999999999999998
          kl: 0.00540293078492854
          policy_loss: -0.06122268819146686
          total_loss: 0.5768072531869014
          vf_explained_var: 0.6662530303001404
          vf_loss: 0.6492726150486204
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,329,9029.4,329000,-1.0017,8.23,-14.54,212.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-26_23-40-35
  done: false
  episode_len_mean: 217.88
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.495399999999979
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 955
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.2195588005913627
          entropy_coeff: 0.009999999999999998
          kl: 0.011522818712168379
          policy_loss: 0.04900864755941762
          total_loss: 0.3945848715802034
          vf_explained_var: 0.853367805480957
          vf_loss: 0.3528044874469439
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,330,9056.65,330000,-1.4954,8.23,-14.54,217.88




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-26_23-42-07
  done: false
  episode_len_mean: 222.17
  episode_media: {}
  episode_reward_max: 8.230000000000006
  episode_reward_mean: -1.4252999999999794
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 959
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.0436540100309584
          entropy_coeff: 0.009999999999999998
          kl: 0.011709143817145279
          policy_loss: 0.15002288379602963
          total_loss: 0.4591095358961158
          vf_explained_var: 0.43372759222984314
          vf_loss: 0.3143138537804286
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,331,9148.46,331000,-1.4253,8.23,-14.54,222.17




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-26_23-42-52
  done: false
  episode_len_mean: 227.25
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.5012999999999785
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 963
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.028140510453118
          entropy_coeff: 0.009999999999999998
          kl: 0.0052076740640790336
          policy_loss: 0.08467664569616318
          total_loss: 0.45701972511079575
          vf_explained_var: 0.4637264609336853
          vf_loss: 0.38586009177896713
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,332,9193.82,332000,-1.5013,9.73,-14.54,227.25


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-26_23-43-22
  done: false
  episode_len_mean: 229.9
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.4664999999999788
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 3
  episodes_total: 966
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.803689800368415
          entropy_coeff: 0.009999999999999998
          kl: 0.005158267124021501
          policy_loss: -0.11793016940355301
          total_loss: 0.30786538268956876
          vf_explained_var: 0.3943372964859009
          vf_loss: 0.43713223276038965
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,333,9224.09,333000,-1.4665,9.73,-14.54,229.9


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-26_23-43-47
  done: false
  episode_len_mean: 235.85
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.7011999999999776
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 3
  episodes_total: 969
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 2.0371588667233786
          entropy_coeff: 0.009999999999999998
          kl: 0.006565731734337849
          policy_loss: -0.01466605994436476
          total_loss: 0.15767426838477452
          vf_explained_var: 0.5131674408912659
          vf_loss: 0.18418349909285706
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,334,9249.05,334000,-1.7012,9.73,-14.54,235.85




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-26_23-45-17
  done: false
  episode_len_mean: 236.21
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.579899999999978
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 5
  episodes_total: 974
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.7407245874404906
          entropy_coeff: 0.009999999999999998
          kl: 0.007114191271779191
          policy_loss: -0.005977618197600047
          total_loss: 0.4185788427790006
          vf_explained_var: 0.5591722130775452
          vf_loss: 0.4327228812707795
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,335,9338.75,335000,-1.5799,9.73,-14.54,236.21




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-26_23-46-21
  done: false
  episode_len_mean: 239.9
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.6047999999999774
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 978
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.9512796521186828
          entropy_coeff: 0.009999999999999998
          kl: 0.009840421982512934
          policy_loss: -0.08661160990595818
          total_loss: 0.42932170478420123
          vf_explained_var: 0.4972379803657532
          vf_loss: 0.5226641014218331
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,336,9403.05,336000,-1.6048,9.73,-14.54,239.9




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-26_23-47-27
  done: false
  episode_len_mean: 235.59
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.3378999999999777
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 5
  episodes_total: 983
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2989284101873633
          cur_lr: 5.000000000000001e-05
          entropy: 1.707156562142902
          entropy_coeff: 0.009999999999999998
          kl: 0.2527968056703948
          policy_loss: 0.07263055267847246
          total_loss: 3.889904413703415
          vf_explained_var: 0.5316384434700012
          vf_loss: 3.5059805197848215
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,337,9468.58,337000,-1.3379,9.73,-14.54,235.59




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-26_23-48-15
  done: false
  episode_len_mean: 237.15
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.4151999999999771
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 4
  episodes_total: 987
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9483926152810453
          cur_lr: 5.000000000000001e-05
          entropy: 2.1032298114564685
          entropy_coeff: 0.009999999999999998
          kl: 0.005976281113837937
          policy_loss: -0.07186758551332685
          total_loss: 1.3119402491384082
          vf_explained_var: 0.5207214951515198
          vf_loss: 1.3931960145632425
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,338,9517.17,338000,-1.4152,9.73,-14.54,237.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-26_23-50-25
  done: false
  episode_len_mean: 231.84
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.2695999999999774
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 6
  episodes_total: 993
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9483926152810453
          cur_lr: 5.000000000000001e-05
          entropy: 1.7142014728652106
          entropy_coeff: 0.009999999999999998
          kl: 0.0032459680464938068
          policy_loss: -0.0655757853968276
          total_loss: 0.9995018366310332
          vf_explained_var: 0.6139447093009949
          vf_loss: 1.0758952167299058
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,339,9646.97,339000,-1.2696,9.73,-14.54,231.84




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-26_23-51-11
  done: false
  episode_len_mean: 231.81
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.2236999999999776
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 5
  episodes_total: 998
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 2.0866611586676704
          entropy_coeff: 0.009999999999999998
          kl: 0.006402484304872639
          policy_loss: -0.1006189270151986
          total_loss: 0.23927283564375507
          vf_explained_var: 0.5846540331840515
          vf_loss: 0.35452109562853973
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,340,9692.83,340000,-1.2237,9.73,-14.54,231.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-26_23-51-41
  done: false
  episode_len_mean: 234.43
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.1701999999999773
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 3
  episodes_total: 1001
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 2.1712869577937655
          entropy_coeff: 0.009999999999999998
          kl: 0.004809450614928422
          policy_loss: -0.020479716940058602
          total_loss: 0.40103730029529994
          vf_explained_var: 0.36582618951797485
          vf_loss: 0.43854453497462803
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,341,9722.28,341000,-1.1702,9.73,-14.54,234.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-26_23-53-27
  done: false
  episode_len_mean: 235.92
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.1254999999999769
  episode_reward_min: -14.539999999999953
  episodes_this_iter: 5
  episodes_total: 1006
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.0527365552054513
          entropy_coeff: 0.009999999999999998
          kl: 0.008283909532738153
          policy_loss: -0.11435017660260201
          total_loss: 0.8036831542849541
          vf_explained_var: 0.5304070711135864
          vf_loss: 0.9345256136523352
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,342,9828.59,342000,-1.1255,9.73,-14.54,235.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-26_23-53-57
  done: false
  episode_len_mean: 236.08
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.286099999999977
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 4
  episodes_total: 1010
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.244655179977417
          entropy_coeff: 0.009999999999999998
          kl: 0.01479466013037761
          policy_loss: 0.13789881906575627
          total_loss: 0.7785399584306611
          vf_explained_var: 0.40157973766326904
          vf_loss: 0.655881238480409
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,343,9858.99,343000,-1.2861,9.73,-16.39,236.08




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-26_23-55-07
  done: false
  episode_len_mean: 239.83
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.466499999999976
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 4
  episodes_total: 1014
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.013688189453549
          entropy_coeff: 0.009999999999999998
          kl: 0.010249406247323624
          policy_loss: 0.1509212685541974
          total_loss: 0.691890052623219
          vf_explained_var: 0.46875661611557007
          vf_loss: 0.55611319343249
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,344,9928.82,344000,-1.4665,9.73,-16.39,239.83




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-26_23-56-30
  done: false
  episode_len_mean: 237.78
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.3359999999999765
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 5
  episodes_total: 1019
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.7713928818702698
          entropy_coeff: 0.009999999999999998
          kl: 0.007309952685708983
          policy_loss: 0.06304162591695786
          total_loss: 0.7506109452909894
          vf_explained_var: 0.267826110124588
          vf_loss: 0.7017225825124317
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,345,10012,345000,-1.336,9.73,-16.39,237.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-26_23-57-44
  done: false
  episode_len_mean: 237.88
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.353199999999976
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 4
  episodes_total: 1023
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.035777265495724
          entropy_coeff: 0.009999999999999998
          kl: 0.009221800843921418
          policy_loss: 0.04864893613590134
          total_loss: 0.45488103582627243
          vf_explained_var: 0.5580583214759827
          vf_loss: 0.4220979475312763
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,346,10085.3,346000,-1.3532,9.73,-16.39,237.88




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-26_23-58-34
  done: false
  episode_len_mean: 239.95
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.6569999999999754
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 4
  episodes_total: 1027
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.8785767369800144
          entropy_coeff: 0.009999999999999998
          kl: 0.008939603680215472
          policy_loss: 0.10731325894594193
          total_loss: 0.8297263655397628
          vf_explained_var: 0.4362512230873108
          vf_loss: 0.7368444141414431
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,347,10135.4,347000,-1.657,9.73,-16.39,239.95




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-26_23-59-22
  done: false
  episode_len_mean: 239.61
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.3697999999999757
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 4
  episodes_total: 1031
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.3068468146853975
          entropy_coeff: 0.009999999999999998
          kl: 0.008920075256476749
          policy_loss: -0.029156540003087785
          total_loss: 1.352534790833791
          vf_explained_var: 0.4187495708465576
          vf_loss: 1.4004148456785415
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,348,10183.8,348000,-1.3698,9.73,-16.39,239.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-27_00-01-26
  done: false
  episode_len_mean: 232.69
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -1.0091999999999772
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 6
  episodes_total: 1037
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.9736855586369833
          entropy_coeff: 0.009999999999999998
          kl: 0.00852935121266797
          policy_loss: 0.06806655600667
          total_loss: 0.918837759229872
          vf_explained_var: 0.7705672979354858
          vf_loss: 0.8663534339931276
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,349,10307.2,349000,-1.0092,9.73,-16.39,232.69




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-27_00-03-44
  done: false
  episode_len_mean: 228.65
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -0.9764999999999776
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 7
  episodes_total: 1044
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.8879480679829916
          entropy_coeff: 0.009999999999999998
          kl: 0.008701938161318385
          policy_loss: 0.002626002248790529
          total_loss: 0.7121127016014523
          vf_explained_var: 0.7987840175628662
          vf_loss: 0.7241274827056461
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,350,10445.3,350000,-0.9765,9.73,-16.39,228.65




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-27_00-06-21
  done: false
  episode_len_mean: 221.44
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -0.5887999999999797
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 7
  episodes_total: 1051
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.5082326862547133
          entropy_coeff: 0.009999999999999998
          kl: 0.01160682262877574
          policy_loss: 0.17219979365666707
          total_loss: 1.2473273525635402
          vf_explained_var: 0.6753358244895935
          vf_loss: 1.0845562120278676
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,351,10602.1,351000,-0.5888,9.73,-16.39,221.44




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-27_00-07-20
  done: false
  episode_len_mean: 220.53
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: -0.31759999999997973
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 3
  episodes_total: 1054
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.2171294702423943
          entropy_coeff: 0.009999999999999998
          kl: 0.008158923901371098
          policy_loss: -0.018609880738788182
          total_loss: 0.8534522914224201
          vf_explained_var: 0.6966750025749207
          vf_loss: 0.8902592758337656
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,352,10661.2,352000,-0.3176,9.73,-16.39,220.53




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-27_00-09-38
  done: false
  episode_len_mean: 213.11
  episode_media: {}
  episode_reward_max: 9.73
  episode_reward_mean: 0.08330000000001996
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 7
  episodes_total: 1061
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.552895008193122
          entropy_coeff: 0.009999999999999998
          kl: 0.007886562779589844
          policy_loss: 0.11250063363048765
          total_loss: 0.806514526075787
          vf_explained_var: 0.7123358249664307
          vf_loss: 0.7057013144095738
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,353,10799,353000,0.0833,9.73,-16.39,213.11




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-27_00-10-48
  done: false
  episode_len_mean: 211.85
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: -0.04409999999998004
  episode_reward_min: -16.38999999999998
  episodes_this_iter: 5
  episodes_total: 1066
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.6305907726287843
          entropy_coeff: 0.009999999999999998
          kl: 0.00786967824930527
          policy_loss: -0.0598733032329215
          total_loss: 0.7187519624829293
          vf_explained_var: 0.645844578742981
          vf_loss: 0.79109787940979
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,354,10869.7,354000,-0.0441,9.56,-16.39,211.85




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-27_00-12-47
  done: false
  episode_len_mean: 205.04
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: 0.15670000000001874
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 6
  episodes_total: 1072
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.883077159192827
          entropy_coeff: 0.009999999999999998
          kl: 0.011157092502352603
          policy_loss: -0.13733291344510185
          total_loss: 0.9656878021028307
          vf_explained_var: 0.6382338404655457
          vf_loss: 1.1164168887668187
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,355,10988.6,355000,0.1567,9.56,-18.76,205.04


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-27_00-13-16
  done: false
  episode_len_mean: 205.36
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: -0.004099999999981225
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 3
  episodes_total: 1075
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.130474030971527
          entropy_coeff: 0.009999999999999998
          kl: 0.010387345173263639
          policy_loss: -0.05827204262216886
          total_loss: 0.4890740155345864
          vf_explained_var: 0.6024348139762878
          vf_loss: 0.5635911342170503
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 3560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,356,11017.2,356000,-0.0041,9.56,-18.76,205.36




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-27_00-14-03
  done: false
  episode_len_mean: 205.82
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: 0.023700000000018803
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 5
  episodes_total: 1080
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.9331369890107049
          entropy_coeff: 0.009999999999999998
          kl: 0.0073957822012925825
          policy_loss: -0.0371540749238597
          total_loss: 0.6233169436454773
          vf_explained_var: 0.7533852458000183
          vf_loss: 0.6761999216344622
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 3570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,357,11064.1,357000,0.0237,9.56,-18.76,205.82




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-27_00-15-07
  done: false
  episode_len_mean: 206.36
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: -0.06299999999998171
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 5
  episodes_total: 1085
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.8865238388379415
          entropy_coeff: 0.009999999999999998
          kl: 0.0059301625651461895
          policy_loss: -0.013266207029422125
          total_loss: 0.27983966320753095
          vf_explained_var: 0.9045630693435669
          vf_loss: 0.3090825361510118
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,358,11127.9,358000,-0.063,9.56,-18.76,206.36




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-27_00-16-00
  done: false
  episode_len_mean: 205.58
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: 0.04100000000001804
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 4
  episodes_total: 1089
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.9155871735678778
          entropy_coeff: 0.009999999999999998
          kl: 0.0063766103037360975
          policy_loss: 0.025566605064604016
          total_loss: 0.45766020702819027
          vf_explained_var: 0.786939263343811
          vf_loss: 0.44814343402783074
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,359,11181.2,359000,0.041,9.56,-18.76,205.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-27_00-18-08
  done: false
  episode_len_mean: 206.52
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: 0.056400000000018886
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 6
  episodes_total: 1095
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.9168499204847547
          entropy_coeff: 0.009999999999999998
          kl: 0.012195524385387478
          policy_loss: -0.07202029960850874
          total_loss: 0.5670050617721346
          vf_explained_var: 0.8275784850120544
          vf_loss: 0.6522534449895223
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 3600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,360,11309.2,360000,0.0564,9.56,-18.76,206.52




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-27_00-19-32
  done: false
  episode_len_mean: 204.19
  episode_media: {}
  episode_reward_max: 9.560000000000006
  episode_reward_mean: 0.2849000000000188
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 5
  episodes_total: 1100
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.0333522809876334
          entropy_coeff: 0.009999999999999998
          kl: 0.010288722676999552
          policy_loss: -0.15757371307247214
          total_loss: 0.15425540047387282
          vf_explained_var: 0.9347923994064331
          vf_loss: 0.3271510172221396
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 36100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,361,11393.1,361000,0.2849,9.56,-18.76,204.19




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-27_00-21-33
  done: false
  episode_len_mean: 198.18
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 0.7993000000000179
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 7
  episodes_total: 1107
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.7651997221840752
          entropy_coeff: 0.009999999999999998
          kl: 0.008406074343985262
          policy_loss: -0.03764676849047343
          total_loss: 0.714748902618885
          vf_explained_var: 0.7388554811477661
          vf_loss: 0.7659530849092536
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,362,11514.3,362000,0.7993,9.73,-18.76,198.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-27_00-22-20
  done: false
  episode_len_mean: 197.52
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.1087000000000176
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 4
  episodes_total: 1111
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 2.020669662952423
          entropy_coeff: 0.009999999999999998
          kl: 0.008057743210778125
          policy_loss: -0.008646600445111592
          total_loss: 0.252763316863113
          vf_explained_var: 0.7694981098175049
          vf_loss: 0.2776917027102576
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,363,11561.2,363000,1.1087,9.73,-18.76,197.52




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-27_00-23-54
  done: false
  episode_len_mean: 195.32
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.262200000000017
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 4
  episodes_total: 1115
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.5907949149608611
          entropy_coeff: 0.009999999999999998
          kl: 0.14748315985273877
          policy_loss: 0.08996671628620889
          total_loss: 1.6884722711311446
          vf_explained_var: 0.32677987217903137
          vf_loss: 1.5425747129652234
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,364,11655.6,364000,1.2622,9.73,-18.76,195.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-27_00-25-12
  done: false
  episode_len_mean: 195.04
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.2967000000000177
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 6
  episodes_total: 1121
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9998515393998888
          entropy_coeff: 0.009999999999999998
          kl: 0.006852155155977815
          policy_loss: 0.059519175026151866
          total_loss: 0.41579549958308537
          vf_explained_var: 0.022624628618359566
          vf_loss: 0.37126833079382776
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,365,11732.7,365000,1.2967,9.73,-18.76,195.04




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-27_00-26-17
  done: false
  episode_len_mean: 196.91
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.1874000000000178
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 3
  episodes_total: 1124
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9258229931195576
          entropy_coeff: 0.009999999999999998
          kl: 0.007801424970945314
          policy_loss: -0.04984976293312179
          total_loss: 0.4994262651436859
          vf_explained_var: 0.7772963643074036
          vf_loss: 0.5628341691361534
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,366,11798.4,366000,1.1874,9.73,-18.76,196.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-27_00-27-42
  done: false
  episode_len_mean: 194.26
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.6535000000000164
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 5
  episodes_total: 1129
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9502778344684177
          entropy_coeff: 0.009999999999999998
          kl: 0.00609173311520394
          policy_loss: 0.021336968160337873
          total_loss: 0.41124137971136304
          vf_explained_var: 0.698144257068634
          vf_loss: 0.40495627754264407
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,367,11882.7,367000,1.6535,9.73,-18.76,194.26


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-27_00-28-03
  done: false
  episode_len_mean: 197.41
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.4291000000000167
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 2
  episodes_total: 1131
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 2.1697054915957983
          entropy_coeff: 0.009999999999999998
          kl: 0.006138069921719315
          policy_loss: -0.11613123897049162
          total_loss: -0.016948493321736653
          vf_explained_var: 0.26829203963279724
          vf_loss: 0.11639503840770986
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,368,11904.6,368000,1.4291,9.73,-18.76,197.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-27_00-29-29
  done: false
  episode_len_mean: 201.0
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.1987000000000172
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 5
  episodes_total: 1136
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7306472307303924
          cur_lr: 5.000000000000001e-05
          entropy: 1.9581111166212293
          entropy_coeff: 0.009999999999999998
          kl: 0.050166494493445964
          policy_loss: 0.02582622567812602
          total_loss: 1.4907393997328149
          vf_explained_var: 0.5641844868659973
          vf_loss: 1.44784027867847
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,369,11990.4,369000,1.1987,9.73,-18.76,201




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-27_00-30-06
  done: false
  episode_len_mean: 203.79
  episode_media: {}
  episode_reward_max: 9.730000000000002
  episode_reward_mean: 1.2189000000000174
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 2
  episodes_total: 1138
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0959708460955877
          cur_lr: 5.000000000000001e-05
          entropy: 2.0636241051885817
          entropy_coeff: 0.009999999999999998
          kl: 0.003949121268386099
          policy_loss: -0.13319630862938034
          total_loss: 0.04171207509934902
          vf_explained_var: 0.8473771214485168
          vf_loss: 0.19121650137628118
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 37000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,370,12027,370000,1.2189,9.73,-18.76,203.79




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-27_00-32-41
  done: false
  episode_len_mean: 206.14
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.363700000000018
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 8
  episodes_total: 1146
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5479854230477939
          cur_lr: 5.000000000000001e-05
          entropy: 1.9455869886610242
          entropy_coeff: 0.009999999999999998
          kl: 0.021018849167887942
          policy_loss: -0.003312432434823778
          total_loss: 1.1259029908312692
          vf_explained_var: 0.6482546329498291
          vf_loss: 1.1371532764699723
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,371,12181.8,371000,1.3637,9.83,-18.76,206.14


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-27_00-33-03
  done: false
  episode_len_mean: 211.41
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.3126000000000193
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 3
  episodes_total: 1149
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 2.1951043592558968
          entropy_coeff: 0.009999999999999998
          kl: 0.013851890977163469
          policy_loss: 0.12290278209580316
          total_loss: 0.49388930607173176
          vf_explained_var: 0.5390939712524414
          vf_loss: 0.381551605131891
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,372,12204.2,372000,1.3126,9.83,-18.76,211.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-27_00-34-18
  done: false
  episode_len_mean: 214.02
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.2908000000000197
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 4
  episodes_total: 1153
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 2.1350719703568353
          entropy_coeff: 0.009999999999999998
          kl: 0.0068793828860912575
          policy_loss: -0.10784191936254502
          total_loss: 0.1267677483873235
          vf_explained_var: 0.7667474746704102
          vf_loss: 0.2503056818826331
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,373,12279.2,373000,1.2908,9.83,-18.76,214.02




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-27_00-36-40
  done: false
  episode_len_mean: 210.12
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.232800000000019
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 8
  episodes_total: 1161
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.6769669082429675
          entropy_coeff: 0.009999999999999998
          kl: 0.006080325573590371
          policy_loss: 0.033785982347197
          total_loss: 0.490731755644083
          vf_explained_var: 0.8257990479469299
          vf_loss: 0.46871754883064165
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,374,12421,374000,1.2328,9.83,-18.76,210.12




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-27_00-38-54
  done: false
  episode_len_mean: 207.89
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.361300000000019
  episode_reward_min: -18.760000000000044
  episodes_this_iter: 7
  episodes_total: 1168
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.971689420276218
          entropy_coeff: 0.009999999999999998
          kl: 0.0070850354010282696
          policy_loss: -0.1477844557000531
          total_loss: 0.1103654888872471
          vf_explained_var: 0.9345457553863525
          vf_loss: 0.27204309296276835
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,375,12554.6,375000,1.3613,9.83,-18.76,207.89




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-27_00-40-10
  done: false
  episode_len_mean: 205.08
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.5515000000000196
  episode_reward_min: -14.969999999999965
  episodes_this_iter: 5
  episodes_total: 1173
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 2.013730952474806
          entropy_coeff: 0.009999999999999998
          kl: 0.007401829182838782
          policy_loss: -0.03932809399233924
          total_loss: 0.15650649062461322
          vf_explained_var: 0.8372304439544678
          vf_loss: 0.20988775913914046
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,376,12631.1,376000,1.5515,9.83,-14.97,205.08




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-27_00-41-10
  done: false
  episode_len_mean: 204.28
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 1.7442000000000197
  episode_reward_min: -9.79999999999994
  episodes_this_iter: 5
  episodes_total: 1178
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9565551903512743
          entropy_coeff: 0.009999999999999998
          kl: 0.01020618780657152
          policy_loss: 0.07677355984018909
          total_loss: 0.6506828299827045
          vf_explained_var: 0.797304630279541
          vf_loss: 0.5850855501161681
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,377,12690.7,377000,1.7442,9.83,-9.8,204.28




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-27_00-45-01
  done: false
  episode_len_mean: 187.97
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.3144000000000178
  episode_reward_min: -9.79999999999994
  episodes_this_iter: 12
  episodes_total: 1190
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.6587570587793985
          entropy_coeff: 0.009999999999999998
          kl: 0.018573777972585265
          policy_loss: -0.022021969159444172
          total_loss: 0.8222092540727721
          vf_explained_var: 0.7283947467803955
          vf_loss: 0.8455515540670604
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,378,12922.3,378000,2.3144,9.86,-9.8,187.97


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-27_00-45-23
  done: false
  episode_len_mean: 192.89
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.083700000000019
  episode_reward_min: -9.79999999999994
  episodes_this_iter: 2
  episodes_total: 1192
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.996991397274865
          entropy_coeff: 0.009999999999999998
          kl: 0.014457419309241118
          policy_loss: 0.022914933496051365
          total_loss: 0.3962065984081063
          vf_explained_var: 0.35959485173225403
          vf_loss: 0.38137789748660805
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,379,12943.9,379000,2.0837,9.86,-9.8,192.89


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-27_00-45-43
  done: false
  episode_len_mean: 198.86
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.9192000000000191
  episode_reward_min: -9.79999999999994
  episodes_this_iter: 2
  episodes_total: 1194
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.821978134571691
          cur_lr: 5.000000000000001e-05
          entropy: 1.9653082225057814
          entropy_coeff: 0.009999999999999998
          kl: 0.0009192907155766545
          policy_loss: -0.11004253447883659
          total_loss: -0.1138974939679934
          vf_explained_var: 0.7748944759368896
          vf_loss: 0.01504248474828071
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,380,12964.2,380000,1.9192,9.86,-9.8,198.86




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-27_00-47-55
  done: false
  episode_len_mean: 194.88
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.091600000000019
  episode_reward_min: -9.79999999999994
  episodes_this_iter: 7
  episodes_total: 1201
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4109890672858455
          cur_lr: 5.000000000000001e-05
          entropy: 1.7596417162153455
          entropy_coeff: 0.009999999999999998
          kl: 0.010619277629648583
          policy_loss: -0.049928342468208736
          total_loss: 0.7447840458816952
          vf_explained_var: 0.816511869430542
          vf_loss: 0.807944396800465
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,381,13096.3,381000,2.0916,9.86,-9.8,194.88




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-27_00-49-14
  done: false
  episode_len_mean: 200.84
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.7341000000000202
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 6
  episodes_total: 1207
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4109890672858455
          cur_lr: 5.000000000000001e-05
          entropy: 2.0098904993798996
          entropy_coeff: 0.009999999999999998
          kl: 0.01352334095623533
          policy_loss: 0.07218827944662835
          total_loss: 0.5199244957831171
          vf_explained_var: 0.6836577653884888
          vf_loss: 0.46227717879745694
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,382,13174.8,382000,1.7341,9.86,-12.86,200.84




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-27_00-50-19
  done: false
  episode_len_mean: 198.56
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.8447000000000195
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 4
  episodes_total: 1211
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4109890672858455
          cur_lr: 5.000000000000001e-05
          entropy: 1.861130776670244
          entropy_coeff: 0.009999999999999998
          kl: 0.008692950044939775
          policy_loss: -0.15029064350657992
          total_loss: 0.15162170343101025
          vf_explained_var: 0.788425624370575
          vf_loss: 0.31695094514224265
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,383,13239.3,383000,1.8447,9.86,-12.86,198.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-27_00-51-33
  done: false
  episode_len_mean: 199.27
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.7497000000000202
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1216
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4109890672858455
          cur_lr: 5.000000000000001e-05
          entropy: 1.921075259314643
          entropy_coeff: 0.009999999999999998
          kl: 0.004623701581842522
          policy_loss: 0.07013576709561878
          total_loss: 0.07666538713706864
          vf_explained_var: 0.8793509602546692
          vf_loss: 0.02384008173313406
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,384,13313.5,384000,1.7497,9.86,-12.86,199.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-27_00-52-50
  done: false
  episode_len_mean: 200.18
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.8209000000000204
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1221
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 2.023834150367313
          entropy_coeff: 0.009999999999999998
          kl: 0.012407590451873713
          policy_loss: 0.022918506132231817
          total_loss: 0.2887178053458532
          vf_explained_var: 0.7494115233421326
          vf_loss: 0.2834879520866606
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,385,13390.5,385000,1.8209,9.86,-12.86,200.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-27_00-54-24
  done: false
  episode_len_mean: 194.25
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.0128000000000204
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1226
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 1.9458958122465346
          entropy_coeff: 0.009999999999999998
          kl: 0.012569870424596404
          policy_loss: -0.08711696738998095
          total_loss: 0.0860191529409753
          vf_explained_var: 0.6864401698112488
          vf_loss: 0.190012039616704
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,386,13484.4,386000,2.0128,9.86,-12.86,194.25




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-27_00-55-25
  done: false
  episode_len_mean: 192.04
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.9831000000000198
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1231
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 2.0735574298434787
          entropy_coeff: 0.009999999999999998
          kl: 0.004293940626434198
          policy_loss: -0.10091994860106045
          total_loss: -0.0344233451411128
          vf_explained_var: 0.650871217250824
          vf_loss: 0.08634979770415359
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,387,13546,387000,1.9831,9.86,-12.86,192.04


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-27_00-55-47
  done: false
  episode_len_mean: 194.77
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.9563000000000208
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 2
  episodes_total: 1233
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10274726682146137
          cur_lr: 5.000000000000001e-05
          entropy: 2.176789395014445
          entropy_coeff: 0.009999999999999998
          kl: 0.020340510935972313
          policy_loss: -0.13432316614521875
          total_loss: -0.08188497399290402
          vf_explained_var: 0.5255212783813477
          vf_loss: 0.0721161533664498
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,388,13567.4,388000,1.9563,9.86,-12.86,194.77




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-27_00-57-39
  done: false
  episode_len_mean: 188.9
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.948600000000021
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 7
  episodes_total: 1240
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1541209002321921
          cur_lr: 5.000000000000001e-05
          entropy: 2.2635911093817818
          entropy_coeff: 0.009999999999999998
          kl: 0.015047031510844737
          policy_loss: 0.0647764036224948
          total_loss: 0.30231782477349045
          vf_explained_var: 0.7756863236427307
          vf_loss: 0.25785827504263986
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,389,13679.9,389000,1.9486,9.86,-12.86,188.9




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-27_00-58-58
  done: false
  episode_len_mean: 195.04
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.5893000000000217
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1245
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1541209002321921
          cur_lr: 5.000000000000001e-05
          entropy: 2.0981694724824695
          entropy_coeff: 0.009999999999999998
          kl: 0.010358994279239456
          policy_loss: -0.08338242015904851
          total_loss: 0.08321990430768993
          vf_explained_var: 0.8344036340713501
          vf_loss: 0.1859874843309323
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,390,13758.2,390000,1.5893,9.86,-12.86,195.04


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-27_00-59-19
  done: false
  episode_len_mean: 194.88
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.650300000000021
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 2
  episodes_total: 1247
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1541209002321921
          cur_lr: 5.000000000000001e-05
          entropy: 2.013722186618381
          entropy_coeff: 0.009999999999999998
          kl: 0.02176590602222561
          policy_loss: 0.07863594690958658
          total_loss: 0.16228707664542727
          vf_explained_var: 0.8194979429244995
          vf_loss: 0.1004337639366794
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,391,13779.6,391000,1.6503,9.86,-12.86,194.88




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-27_01-00-16
  done: false
  episode_len_mean: 195.92
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.5856000000000217
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 4
  episodes_total: 1251
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23118135034828802
          cur_lr: 5.000000000000001e-05
          entropy: 2.117801793416341
          entropy_coeff: 0.009999999999999998
          kl: 0.009681639927610498
          policy_loss: -0.07807080621520678
          total_loss: -0.02006881580584579
          vf_explained_var: 0.8988699913024902
          vf_loss: 0.07694179440538089
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 39200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,392,13836.3,392000,1.5856,9.86,-12.86,195.92




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-27_01-01-32
  done: false
  episode_len_mean: 195.79
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.4793000000000216
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1256
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23118135034828802
          cur_lr: 5.000000000000001e-05
          entropy: 2.119832396507263
          entropy_coeff: 0.009999999999999998
          kl: 0.008469051258617159
          policy_loss: 0.11046589588125547
          total_loss: 0.332504906753699
          vf_explained_var: 0.8841779232025146
          vf_loss: 0.24127944965536396
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,393,13912.8,393000,1.4793,9.86,-12.86,195.79




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-27_01-03-29
  done: false
  episode_len_mean: 196.22
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.464900000000022
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 7
  episodes_total: 1263
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23118135034828802
          cur_lr: 5.000000000000001e-05
          entropy: 2.079264461994171
          entropy_coeff: 0.009999999999999998
          kl: 0.021897620734747075
          policy_loss: 0.00025123018357488844
          total_loss: 0.31596063102285066
          vf_explained_var: 0.8338954448699951
          vf_loss: 0.3314397215274059
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,394,14029.3,394000,1.4649,9.86,-12.86,196.22




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-27_01-04-28
  done: false
  episode_len_mean: 201.58
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.3332000000000235
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 4
  episodes_total: 1267
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.1405125167634753
          entropy_coeff: 0.009999999999999998
          kl: 0.010176094826133072
          policy_loss: 0.062175212303797404
          total_loss: 0.17814002446830274
          vf_explained_var: 0.2674022912979126
          vf_loss: 0.1338411473597969
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,395,14088.8,395000,1.3332,9.86,-12.86,201.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-27_01-06-16
  done: false
  episode_len_mean: 198.27
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.391100000000023
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 7
  episodes_total: 1274
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.2088621881273056
          entropy_coeff: 0.009999999999999998
          kl: 0.010905067295462473
          policy_loss: 0.032387107610702515
          total_loss: 0.5361668863437242
          vf_explained_var: 0.7248468399047852
          vf_loss: 0.5220868383844693
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,396,14196.1,396000,1.3911,9.86,-12.86,198.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-27_01-07-33
  done: false
  episode_len_mean: 195.4
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.4018000000000228
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1279
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.1338550157017178
          entropy_coeff: 0.009999999999999998
          kl: 0.007653149996684544
          policy_loss: -0.039298338194688164
          total_loss: -0.015339550541506873
          vf_explained_var: 0.6061784625053406
          vf_loss: 0.042643438398631085
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,397,14273.5,397000,1.4018,9.86,-12.86,195.4




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-27_01-08-10
  done: false
  episode_len_mean: 200.61
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.3498000000000234
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 2
  episodes_total: 1281
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.1565292252434625
          entropy_coeff: 0.009999999999999998
          kl: 0.013651272598091149
          policy_loss: -0.14522840281327565
          total_loss: -0.12132901350657145
          vf_explained_var: 0.9378144145011902
          vf_loss: 0.04073079861700535
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 3980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,398,14310.7,398000,1.3498,9.86,-12.86,200.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-27_01-10-28
  done: false
  episode_len_mean: 210.68
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 0.9777000000000249
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 9
  episodes_total: 1290
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 1.9233068333731758
          entropy_coeff: 0.009999999999999998
          kl: 0.012586879792825004
          policy_loss: -0.1827236059639189
          total_loss: -0.10108117270800802
          vf_explained_var: 0.9647679924964905
          vf_loss: 0.09651072043925524
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 39900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,399,14448,399000,0.9777,9.86,-12.86,210.68




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-27_01-11-05
  done: false
  episode_len_mean: 208.61
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.105800000000024
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 2
  episodes_total: 1292
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.146201929781172
          entropy_coeff: 0.009999999999999998
          kl: 0.011996042292092805
          policy_loss: -0.11547306329011917
          total_loss: -0.009525191121631199
          vf_explained_var: 0.2245955765247345
          vf_loss: 0.1232499952091732
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,400,14485.7,400000,1.1058,9.86,-12.86,208.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-27_01-11-58
  done: false
  episode_len_mean: 205.43
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.266200000000024
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 4
  episodes_total: 1296
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.19951974550883
          entropy_coeff: 0.009999999999999998
          kl: 0.011329129546406413
          policy_loss: -0.09213979459471172
          total_loss: -0.07906255713767475
          vf_explained_var: 0.30547821521759033
          vf_loss: 0.03114381314177687
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,401,14538.6,401000,1.2662,9.86,-12.86,205.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-27_01-13-24
  done: false
  episode_len_mean: 211.76
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 1.0298000000000243
  episode_reward_min: -12.85999999999994
  episodes_this_iter: 5
  episodes_total: 1301
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.2212779680887857
          entropy_coeff: 0.009999999999999998
          kl: 0.005349287867215501
          policy_loss: -0.21148220383458669
          total_loss: -0.2028418968121211
          vf_explained_var: 0.9910354018211365
          vf_loss: 0.0289981026823322
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,402,14623.9,402000,1.0298,9.86,-12.86,211.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-27_01-15-43
  done: false
  episode_len_mean: 204.43
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.3063000000000236
  episode_reward_min: -11.579999999999961
  episodes_this_iter: 9
  episodes_total: 1310
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 1.9227021137873332
          entropy_coeff: 0.009999999999999998
          kl: 0.014652741551117721
          policy_loss: 0.060288728773593904
          total_loss: 0.2436336381567849
          vf_explained_var: 0.48375728726387024
          vf_loss: 0.19749077196336454
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,403,14763.7,403000,1.3063,9.91,-11.58,204.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-27_01-16-36
  done: false
  episode_len_mean: 197.79
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.5578000000000225
  episode_reward_min: -11.579999999999961
  episodes_this_iter: 3
  episodes_total: 1313
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.210082949532403
          entropy_coeff: 0.009999999999999998
          kl: 0.005863092748799876
          policy_loss: -0.053049469283885424
          total_loss: -0.004821958558426963
          vf_explained_var: 0.8130631446838379
          vf_loss: 0.06829518548005985
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,404,14816.7,404000,1.5578,9.91,-11.58,197.79




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-27_01-17-17
  done: false
  episode_len_mean: 205.3
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.3369000000000235
  episode_reward_min: -11.579999999999961
  episodes_this_iter: 4
  episodes_total: 1317
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.145328356160058
          entropy_coeff: 0.009999999999999998
          kl: 0.013385326684376529
          policy_loss: 0.03918486005730099
          total_loss: 0.2640971188743909
          vf_explained_var: 0.12121852487325668
          vf_loss: 0.2417238864245721
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,405,14857,405000,1.3369,9.91,-11.58,205.3




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-27_01-18-52
  done: false
  episode_len_mean: 206.1
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.3373000000000235
  episode_reward_min: -11.579999999999961
  episodes_this_iter: 5
  episodes_total: 1322
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.34677202552243225
          cur_lr: 5.000000000000001e-05
          entropy: 2.1537471810976663
          entropy_coeff: 0.009999999999999998
          kl: 0.16954025448935434
          policy_loss: 0.02069670938783222
          total_loss: 0.9070319446010722
          vf_explained_var: -0.22796662151813507
          vf_loss: 0.8490808950737119
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,406,14952,406000,1.3373,9.91,-11.58,206.1




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-27_01-20-09
  done: false
  episode_len_mean: 203.43
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.3239000000000225
  episode_reward_min: -11.579999999999961
  episodes_this_iter: 5
  episodes_total: 1327
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 2.173159186045329
          entropy_coeff: 0.009999999999999998
          kl: 0.02245752725727804
          policy_loss: -0.0018719711237483554
          total_loss: 0.5642087885075145
          vf_explained_var: 0.5127092599868774
          vf_loss: 0.576130892501937
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,407,15028.8,407000,1.3239,9.91,-11.58,203.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-27_01-23-57
  done: false
  episode_len_mean: 190.54
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.6776000000000215
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 13
  episodes_total: 1340
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 2.006038044558631
          entropy_coeff: 0.009999999999999998
          kl: 0.01107176255326094
          policy_loss: 0.10772333997819158
          total_loss: 0.4431628698276149
          vf_explained_var: 0.8279083371162415
          vf_loss: 0.346861313117875
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,408,15257.5,408000,1.6776,9.91,-12.82,190.54




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-27_01-24-58
  done: false
  episode_len_mean: 192.76
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.5745000000000218
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 4
  episodes_total: 1344
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 2.2781425926420424
          entropy_coeff: 0.009999999999999998
          kl: 0.009925334721999787
          policy_loss: -0.04341025814000103
          total_loss: 0.03024854047430886
          vf_explained_var: 0.6421990990638733
          vf_loss: 0.08869611099362373
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,409,15318.1,409000,1.5745,9.91,-12.82,192.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-27_01-25-37
  done: false
  episode_len_mean: 189.4
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.7227000000000212
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 3
  episodes_total: 1347
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 2.271378254890442
          entropy_coeff: 0.009999999999999998
          kl: 0.008236734370729516
          policy_loss: -0.05400952216651705
          total_loss: -0.04530945867300033
          vf_explained_var: 0.954103410243988
          vf_loss: 0.024987241253256798
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,410,15357.1,410000,1.7227,9.91,-12.82,189.4




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-27_01-26-49
  done: false
  episode_len_mean: 185.69
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.819100000000021
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 5
  episodes_total: 1352
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7802370574254722
          cur_lr: 5.000000000000001e-05
          entropy: 2.2162338164117603
          entropy_coeff: 0.009999999999999998
          kl: 0.0019244564728696783
          policy_loss: -0.10925478554434247
          total_loss: -0.0911489893992742
          vf_explained_var: 0.7720865607261658
          vf_loss: 0.03876659975697597
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,411,15429.2,411000,1.8191,9.91,-12.82,185.69




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-27_01-27-50
  done: false
  episode_len_mean: 186.75
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 1.8499000000000207
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 5
  episodes_total: 1357
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3901185287127361
          cur_lr: 5.000000000000001e-05
          entropy: 2.1193555858400135
          entropy_coeff: 0.009999999999999998
          kl: 0.012342602939949012
          policy_loss: 0.08139039344257779
          total_loss: 0.2800765381091171
          vf_explained_var: 0.42448797821998596
          vf_loss: 0.21506462684935992
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,412,15489.7,412000,1.8499,9.91,-12.82,186.75




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-27_01-31-39
  done: false
  episode_len_mean: 174.46
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.4296000000000193
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 12
  episodes_total: 1369
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3901185287127361
          cur_lr: 5.000000000000001e-05
          entropy: 2.0154703603850472
          entropy_coeff: 0.009999999999999998
          kl: 0.012472770200184148
          policy_loss: -0.16070012441939777
          total_loss: -0.06881548621588283
          vf_explained_var: 0.9791932702064514
          vf_loss: 0.10717348218378094
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,413,15719.3,413000,2.4296,9.91,-12.82,174.46




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-27_01-32-55
  done: false
  episode_len_mean: 176.14
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.404700000000019
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 5
  episodes_total: 1374
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3901185287127361
          cur_lr: 5.000000000000001e-05
          entropy: 2.192472963862949
          entropy_coeff: 0.009999999999999998
          kl: 0.006627470364554622
          policy_loss: -0.12215766691499286
          total_loss: -0.09394721852408515
          vf_explained_var: 0.6128861904144287
          vf_loss: 0.047549678726742664
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,414,15794.9,414000,2.4047,9.91,-12.82,176.14




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-27_01-33-32
  done: false
  episode_len_mean: 180.01
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.30590000000002
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 4
  episodes_total: 1378
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3901185287127361
          cur_lr: 5.000000000000001e-05
          entropy: 2.2809280051125422
          entropy_coeff: 0.009999999999999998
          kl: 0.007389523728443642
          policy_loss: 0.20422777679438392
          total_loss: 0.21258537537521785
          vf_explained_var: 0.7151815891265869
          vf_loss: 0.02828408487710274
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,415,15832.3,415000,2.3059,9.91,-12.82,180.01




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-27_01-35-07
  done: false
  episode_len_mean: 174.63
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.419800000000019
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 5
  episodes_total: 1383
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3901185287127361
          cur_lr: 5.000000000000001e-05
          entropy: 2.27608777946896
          entropy_coeff: 0.009999999999999998
          kl: 0.14759789637115264
          policy_loss: -0.007818671315908432
          total_loss: 0.9740049798455503
          vf_explained_var: 0.2102695256471634
          vf_loss: 0.9470038540454374
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,416,15927.4,416000,2.4198,9.91,-12.82,174.63




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-27_01-35-50
  done: false
  episode_len_mean: 182.15
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.26340000000002
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 4
  episodes_total: 1387
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 2.2867787228690255
          entropy_coeff: 0.009999999999999998
          kl: 0.0060734522940135505
          policy_loss: -0.06168895663900508
          total_loss: -0.04122614974362983
          vf_explained_var: 0.6620432734489441
          vf_loss: 0.039776544293595686
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,417,15970,417000,2.2634,9.91,-12.82,182.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-27_01-36-49
  done: false
  episode_len_mean: 185.21
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.3024000000000204
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 4
  episodes_total: 1391
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5851777930691044
          cur_lr: 5.000000000000001e-05
          entropy: 2.2998632219102646
          entropy_coeff: 0.009999999999999998
          kl: 0.004656855268610806
          policy_loss: -0.09051550726095835
          total_loss: -0.08347375591595968
          vf_explained_var: 0.8625399470329285
          vf_loss: 0.027315292714370623
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,418,16028.9,418000,2.3024,9.91,-12.82,185.21




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-27_01-38-05
  done: false
  episode_len_mean: 177.23
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.4724000000000186
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 6
  episodes_total: 1397
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2925888965345522
          cur_lr: 5.000000000000001e-05
          entropy: 2.31797686152988
          entropy_coeff: 0.009999999999999998
          kl: 0.006543317595398151
          policy_loss: -0.03450890059272448
          total_loss: -0.023611592170264987
          vf_explained_var: 0.8596566319465637
          vf_loss: 0.03216257628777789
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,419,16104.9,419000,2.4724,9.91,-12.82,177.23




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-27_01-40-42
  done: false
  episode_len_mean: 173.29
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 2.681500000000018
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 9
  episodes_total: 1406
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2925888965345522
          cur_lr: 5.000000000000001e-05
          entropy: 2.128583331902822
          entropy_coeff: 0.009999999999999998
          kl: 0.0183516757668431
          policy_loss: -0.0633687083919843
          total_loss: 0.6236849822931819
          vf_explained_var: 0.786277711391449
          vf_loss: 0.7029700252744887
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,420,16261.9,420000,2.6815,9.91,-12.82,173.29




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-27_01-42-41
  done: false
  episode_len_mean: 167.31
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 2.701900000000017
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 8
  episodes_total: 1414
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2925888965345522
          cur_lr: 5.000000000000001e-05
          entropy: 2.097726457648807
          entropy_coeff: 0.009999999999999998
          kl: 0.012590677704129187
          policy_loss: -0.07973045988215341
          total_loss: 0.02264450180033843
          vf_explained_var: 0.7107157111167908
          vf_loss: 0.11966833203203148
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,421,16381.3,421000,2.7019,9.9,-12.82,167.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-27_01-46-07
  done: false
  episode_len_mean: 157.06
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 2.939300000000016
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 12
  episodes_total: 1426
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2925888965345522
          cur_lr: 5.000000000000001e-05
          entropy: 2.058180562655131
          entropy_coeff: 0.009999999999999998
          kl: 0.02172037465336968
          policy_loss: -0.0622211223675145
          total_loss: 0.29839444864127374
          vf_explained_var: 0.9392365217208862
          vf_loss: 0.3748422456232624
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,422,16587,422000,2.9393,9.9,-12.82,157.06


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-27_01-46-32
  done: false
  episode_len_mean: 156.57
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 2.9919000000000153
  episode_reward_min: -12.81999999999995
  episodes_this_iter: 2
  episodes_total: 1428
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 2.3674715095096164
          entropy_coeff: 0.009999999999999998
          kl: 0.01574001651385372
          policy_loss: 0.05145184422532718
          total_loss: 0.16409076232877043
          vf_explained_var: 0.5568590760231018
          vf_loss: 0.12940559834241866
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,423,16611.7,423000,2.9919,9.9,-12.82,156.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-27_01-48-16
  done: false
  episode_len_mean: 160.1
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.121800000000016
  episode_reward_min: -8.45999999999995
  episodes_this_iter: 7
  episodes_total: 1435
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 2.0867893603112964
          entropy_coeff: 0.009999999999999998
          kl: 0.008735983453057086
          policy_loss: 0.07995273802015516
          total_loss: 0.2587375890877512
          vf_explained_var: 0.8347175717353821
          vf_loss: 0.19581866822360705
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,424,16716,424000,3.1218,9.92,-8.46,160.1




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-27_01-50-15
  done: false
  episode_len_mean: 165.47
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.0173000000000165
  episode_reward_min: -8.45999999999995
  episodes_this_iter: 7
  episodes_total: 1442
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 2.128383329179552
          entropy_coeff: 0.009999999999999998
          kl: 0.011047367780591181
          policy_loss: -0.11801805843909581
          total_loss: 0.05336614896853765
          vf_explained_var: 0.9086204767227173
          vf_loss: 0.1878195310321947
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,425,16835.3,425000,3.0173,9.92,-8.46,165.47




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-27_01-50-55
  done: false
  episode_len_mean: 165.76
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.9770000000000163
  episode_reward_min: -8.45999999999995
  episodes_this_iter: 3
  episodes_total: 1445
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 2.368697905540466
          entropy_coeff: 0.009999999999999998
          kl: 0.008685667403337335
          policy_loss: 0.060054471467932066
          total_loss: 0.35845625292923716
          vf_explained_var: 0.4698949158191681
          vf_loss: 0.3182767681673997
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,426,16874.9,426000,2.977,9.92,-8.46,165.76


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-27_01-51-17
  done: false
  episode_len_mean: 170.61
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.8276000000000168
  episode_reward_min: -8.45999999999995
  episodes_this_iter: 2
  episodes_total: 1447
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4388833448018283
          cur_lr: 5.000000000000001e-05
          entropy: 2.3216725163989596
          entropy_coeff: 0.009999999999999998
          kl: 0.023707695710948447
          policy_loss: 0.0714919336967998
          total_loss: 0.06574209742248058
          vf_explained_var: 0.3260897099971771
          vf_loss: 0.007061979811017712
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,427,16896.6,427000,2.8276,9.92,-8.46,170.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-27_01-54-25
  done: false
  episode_len_mean: 155.94
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.298500000000016
  episode_reward_min: -8.429999999999936
  episodes_this_iter: 11
  episodes_total: 1458
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.8515131142404344
          entropy_coeff: 0.009999999999999998
          kl: 0.006272761617154031
          policy_loss: -0.19877119983235994
          total_loss: -0.16767888474795553
          vf_explained_var: 0.35050761699676514
          vf_loss: 0.045477929794126085
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,428,17084.5,428000,3.2985,9.92,-8.43,155.94




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-27_01-56-59
  done: false
  episode_len_mean: 162.96
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.1383000000000165
  episode_reward_min: -8.429999999999936
  episodes_this_iter: 10
  episodes_total: 1468
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.8224741677443186
          entropy_coeff: 0.009999999999999998
          kl: 0.0051506862315873255
          policy_loss: 0.12654623687267302
          total_loss: 0.1895653520193365
          vf_explained_var: 0.7480764389038086
          vf_loss: 0.07785303428665631
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,429,17238.6,429000,3.1383,9.92,-8.43,162.96




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-27_01-59-10
  done: false
  episode_len_mean: 154.54
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.313400000000016
  episode_reward_min: -8.429999999999936
  episodes_this_iter: 8
  episodes_total: 1476
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 2.1872860087288752
          entropy_coeff: 0.009999999999999998
          kl: 0.011012125102793075
          policy_loss: -0.14619440096947883
          total_loss: 0.024551109969615938
          vf_explained_var: 0.2358894944190979
          vf_loss: 0.18536881668907074
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,430,17369.6,430000,3.3134,9.92,-8.43,154.54




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-27_02-01-42
  done: false
  episode_len_mean: 149.57
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.415500000000016
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 9
  episodes_total: 1485
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 2.3081818342208864
          entropy_coeff: 0.009999999999999998
          kl: 0.008309865861796092
          policy_loss: -0.031706676052676304
          total_loss: 0.08312199094539716
          vf_explained_var: 0.8433505296707153
          vf_loss: 0.13243988814453284
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,431,17522.1,431000,3.4155,9.93,-10.8,149.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-27_02-02-22
  done: false
  episode_len_mean: 147.02
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.541500000000015
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 3
  episodes_total: 1488
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 2.371953927146064
          entropy_coeff: 0.009999999999999998
          kl: 0.009932166865106742
          policy_loss: 0.08698215786781575
          total_loss: 0.08034248567289776
          vf_explained_var: 0.6161197423934937
          vf_loss: 0.01054127214786907
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,432,17561.5,432000,3.5415,9.93,-10.8,147.02




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-27_02-03-16
  done: false
  episode_len_mean: 149.1
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.501300000000015
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 4
  episodes_total: 1492
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 2.3195699705017936
          entropy_coeff: 0.009999999999999998
          kl: 0.006047615774425235
          policy_loss: 0.07340347270170848
          total_loss: 0.06959106292989518
          vf_explained_var: 0.3293336033821106
          vf_loss: 0.01540199750258277
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,433,17615.6,433000,3.5013,9.93,-10.8,149.1


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-27_02-03-33
  done: false
  episode_len_mean: 154.41
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.407700000000016
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 2
  episodes_total: 1494
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 2.379159683651394
          entropy_coeff: 0.009999999999999998
          kl: 0.007356828617477598
          policy_loss: 0.07454404648807314
          total_loss: 0.0598634594016605
          vf_explained_var: 0.3330402970314026
          vf_loss: 0.004267826614280542
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,434,17632.4,434000,3.4077,9.93,-10.8,154.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-27_02-04-34
  done: false
  episode_len_mean: 153.57
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.5121000000000167
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 4
  episodes_total: 1498
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6583250172027423
          cur_lr: 5.000000000000001e-05
          entropy: 1.7612279958195156
          entropy_coeff: 0.009999999999999998
          kl: 0.025076383559163803
          policy_loss: 0.05177161759800381
          total_loss: 0.1146443138519923
          vf_explained_var: 0.9306673407554626
          vf_loss: 0.06397656387545997
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,435,17693.3,435000,3.5121,9.93,-10.8,153.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-27_02-05-46
  done: false
  episode_len_mean: 160.18
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.3717000000000166
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 5
  episodes_total: 1503
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9874875258041136
          cur_lr: 5.000000000000001e-05
          entropy: 2.1733842425876193
          entropy_coeff: 0.009999999999999998
          kl: 0.0031947595247285954
          policy_loss: -0.20555949782331784
          total_loss: -0.11848791564504306
          vf_explained_var: 0.6165948510169983
          vf_loss: 0.10565063814736075
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,436,17765.7,436000,3.3717,9.93,-10.8,160.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-27_02-06-54
  done: false
  episode_len_mean: 167.32
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.341200000000019
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 4
  episodes_total: 1507
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4937437629020568
          cur_lr: 5.000000000000001e-05
          entropy: 1.8855384051799775
          entropy_coeff: 0.009999999999999998
          kl: 0.005787916713567773
          policy_loss: 0.1015578826268514
          total_loss: 0.21659765425655578
          vf_explained_var: 0.36721593141555786
          vf_loss: 0.13103740658197138
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,437,17833.5,437000,3.3412,9.93,-10.8,167.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-27_02-07-47
  done: false
  episode_len_mean: 175.13
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.11250000000002
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 4
  episodes_total: 1511
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4937437629020568
          cur_lr: 5.000000000000001e-05
          entropy: 2.327386424276564
          entropy_coeff: 0.009999999999999998
          kl: 0.003180077226983771
          policy_loss: 0.07757712486717436
          total_loss: 0.06341987550258636
          vf_explained_var: 0.2841532528400421
          vf_loss: 0.0075464705833130415
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,438,17886.4,438000,3.1125,9.93,-10.8,175.13




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-27_02-11-05
  done: false
  episode_len_mean: 172.71
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1692000000000196
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 10
  episodes_total: 1521
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2468718814510284
          cur_lr: 5.000000000000001e-05
          entropy: 1.242815265390608
          entropy_coeff: 0.009999999999999998
          kl: 0.010134422463741742
          policy_loss: 0.014152139094140795
          total_loss: 0.20516733212603463
          vf_explained_var: 0.7098106145858765
          vf_loss: 0.20094144112533993
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,439,18084.6,439000,3.1692,9.93,-10.8,172.71


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-27_02-11-23
  done: false
  episode_len_mean: 182.69
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.9686000000000217
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 2
  episodes_total: 1523
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2468718814510284
          cur_lr: 5.000000000000001e-05
          entropy: 2.2276484065585667
          entropy_coeff: 0.009999999999999998
          kl: 0.01344118073909846
          policy_loss: -0.12649673389063942
          total_loss: -0.13272925284173753
          vf_explained_var: 0.21357637643814087
          vf_loss: 0.01272571325664305
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,440,18103,440000,2.9686,9.93,-10.8,182.69




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-27_02-12-35
  done: false
  episode_len_mean: 181.51
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.118300000000021
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 5
  episodes_total: 1528
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2468718814510284
          cur_lr: 5.000000000000001e-05
          entropy: 2.2461704625023735
          entropy_coeff: 0.009999999999999998
          kl: 0.005195865957056937
          policy_loss: -0.07052758791380459
          total_loss: -0.07932798282967674
          vf_explained_var: 0.21456006169319153
          vf_loss: 0.012378599787431692
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,441,18174.7,441000,3.1183,9.93,-10.8,181.51




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-27_02-14-47
  done: false
  episode_len_mean: 181.31
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.159200000000021
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 7
  episodes_total: 1535
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2468718814510284
          cur_lr: 5.000000000000001e-05
          entropy: 2.2254898481898837
          entropy_coeff: 0.009999999999999998
          kl: 0.008023548428185596
          policy_loss: 0.07178510634435548
          total_loss: 0.05856211533149083
          vf_explained_var: 0.13089759647846222
          vf_loss: 0.007051118288008082
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,442,18306.9,442000,3.1592,9.93,-10.8,181.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-27_02-17-20
  done: false
  episode_len_mean: 174.15
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2714000000000207
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 8
  episodes_total: 1543
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2468718814510284
          cur_lr: 5.000000000000001e-05
          entropy: 1.9435804857148065
          entropy_coeff: 0.009999999999999998
          kl: 0.059976534025718446
          policy_loss: 0.03268338640530904
          total_loss: 0.638958180281851
          vf_explained_var: -0.11536329239606857
          vf_loss: 0.6109040914375025
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,443,18459.7,443000,3.2714,9.93,-10.8,174.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-27_02-17-56
  done: false
  episode_len_mean: 175.56
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2949000000000206
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 3
  episodes_total: 1546
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.282734309302436
          entropy_coeff: 0.009999999999999998
          kl: 0.007070866555953851
          policy_loss: 0.026086137500695057
          total_loss: 0.015456536619199646
          vf_explained_var: 0.13088074326515198
          vf_loss: 0.009579345486256191
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,444,18495.4,444000,3.2949,9.93,-10.8,175.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-27_02-19-09
  done: false
  episode_len_mean: 177.53
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2251000000000216
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 5
  episodes_total: 1551
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.349720679389106
          entropy_coeff: 0.009999999999999998
          kl: 0.007338924491471567
          policy_loss: 0.14784325435757636
          total_loss: 0.14298796359863547
          vf_explained_var: -0.06202199310064316
          vf_loss: 0.015924255046734795
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,445,18568,445000,3.2251,9.93,-10.8,177.53




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-27_02-20-21
  done: false
  episode_len_mean: 180.13
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.0989000000000226
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 4
  episodes_total: 1555
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.1594691912333173
          entropy_coeff: 0.009999999999999998
          kl: 0.007755523520375974
          policy_loss: -0.06210206366247601
          total_loss: -0.05511445270644294
          vf_explained_var: 0.3849777579307556
          vf_loss: 0.025710366958648794
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,446,18640.9,446000,3.0989,9.93,-10.8,180.13




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-27_02-22-15
  done: false
  episode_len_mean: 178.18
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.0921000000000225
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 7
  episodes_total: 1562
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.139110839366913
          entropy_coeff: 0.009999999999999998
          kl: 0.017743483729543985
          policy_loss: 0.052757944415013
          total_loss: 0.3728249801529778
          vf_explained_var: 0.7596341371536255
          vf_loss: 0.33488758967982396
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,447,18754.4,447000,3.0921,9.93,-10.8,178.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-27_02-23-12
  done: false
  episode_len_mean: 191.2
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.622400000000024
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 5
  episodes_total: 1567
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.3710160599814523
          entropy_coeff: 0.009999999999999998
          kl: 0.018275695711382545
          policy_loss: 0.05938928789562649
          total_loss: 0.3548307473253873
          vf_explained_var: 0.6474508047103882
          vf_loss: 0.3123839818769031
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,448,18811.9,448000,2.6224,9.93,-10.8,191.2




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-27_02-24-28
  done: false
  episode_len_mean: 195.15
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.5526000000000253
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 5
  episodes_total: 1572
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.3365169525146485
          entropy_coeff: 0.009999999999999998
          kl: 0.013519628757385214
          policy_loss: -0.13796610310673713
          total_loss: 0.10961469275255999
          vf_explained_var: 0.6815803050994873
          vf_loss: 0.2659395413266288
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,449,18887.5,449000,2.5526,9.93,-10.8,195.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-27_02-25-06
  done: false
  episode_len_mean: 204.17
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.345500000000026
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 3
  episodes_total: 1575
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.051079041428036
          entropy_coeff: 0.009999999999999998
          kl: 0.015122511374333828
          policy_loss: -0.05009239522947206
          total_loss: 0.05625306045015653
          vf_explained_var: 0.1963443160057068
          vf_loss: 0.12125625784715845
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,450,18925,450000,2.3455,9.93,-10.8,204.17


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-27_02-25-24
  done: false
  episode_len_mean: 209.82
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.1984000000000266
  episode_reward_min: -10.799999999999923
  episodes_this_iter: 2
  episodes_total: 1577
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 2.2214003801345825
          entropy_coeff: 0.009999999999999998
          kl: 0.012219615764299648
          policy_loss: -0.13801066502928733
          total_loss: -0.14117671350638072
          vf_explained_var: 0.7978315353393555
          vf_loss: 0.01452293750933475
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,451,18943.4,451000,2.1984,9.93,-10.8,209.82




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-27_02-28-03
  done: false
  episode_len_mean: 205.44
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.5111000000000256
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 7
  episodes_total: 1584
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3703078221765425
          cur_lr: 5.000000000000001e-05
          entropy: 1.04681822028425
          entropy_coeff: 0.009999999999999998
          kl: 0.04796555637610361
          policy_loss: -0.06351417342407836
          total_loss: 1.6599911873125368
          vf_explained_var: 0.8128210306167603
          vf_loss: 1.7162115005569325
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,452,19102,452000,2.5111,9.93,-8.89,205.44




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-27_02-31-33
  done: false
  episode_len_mean: 185.36
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.083900000000022
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 12
  episodes_total: 1596
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.812482804722256
          entropy_coeff: 0.009999999999999998
          kl: 0.0059030257116620055
          policy_loss: 0.06984921395778657
          total_loss: 0.36924681067466736
          vf_explained_var: 0.39440277218818665
          vf_loss: 0.31424352372220404
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,453,19312.8,453000,3.0839,9.92,-8.89,185.36




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-27_02-32-49
  done: false
  episode_len_mean: 183.19
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.0273000000000225
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 5
  episodes_total: 1601
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 2.079849991533491
          entropy_coeff: 0.009999999999999998
          kl: 0.00959423360561718
          policy_loss: -0.02292454954650667
          total_loss: 0.10108068734407424
          vf_explained_var: 0.7953027486801147
          vf_loss: 0.13947451198990973
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,454,19388.7,454000,3.0273,9.92,-8.89,183.19




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-27_02-33-49
  done: false
  episode_len_mean: 181.64
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.8850000000000215
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 4
  episodes_total: 1605
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 2.1558874050776162
          entropy_coeff: 0.009999999999999998
          kl: 0.005763329842189262
          policy_loss: 0.008811322185728285
          total_loss: 0.17966914230750666
          vf_explained_var: 0.657089352607727
          vf_loss: 0.1892153852722711
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,455,19448.5,455000,2.885,9.92,-8.89,181.64




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-27_02-35-08
  done: false
  episode_len_mean: 180.12
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.9131000000000222
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 5
  episodes_total: 1610
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 2.2372551414701674
          entropy_coeff: 0.009999999999999998
          kl: 0.00728877395847718
          policy_loss: 0.11954323757025931
          total_loss: 0.19985521452294455
          vf_explained_var: 0.13448046147823334
          vf_loss: 0.09863589155591196
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,456,19527.3,456000,2.9131,9.92,-8.89,180.12




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-27_02-35-44
  done: false
  episode_len_mean: 185.22
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.6989000000000214
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 3
  episodes_total: 1613
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 2.2952095058229234
          entropy_coeff: 0.009999999999999998
          kl: 0.01027260902640506
          policy_loss: 0.12783500519063737
          total_loss: 0.12236703948842155
          vf_explained_var: 0.3749372661113739
          vf_loss: 0.01177809130296939
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,457,19563.1,457000,2.6989,9.92,-8.89,185.22




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-27_02-37-39
  done: false
  episode_len_mean: 190.11
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.7319000000000226
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 6
  episodes_total: 1619
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 2.021596191989051
          entropy_coeff: 0.009999999999999998
          kl: 0.007504553333608064
          policy_loss: -0.09979653126663632
          total_loss: 0.029459385325511296
          vf_explained_var: 0.7495729923248291
          vf_loss: 0.14530338716641483
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,458,19678.1,458000,2.7319,9.92,-8.89,190.11




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-27_02-39-50
  done: false
  episode_len_mean: 180.36
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.9819000000000218
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 8
  episodes_total: 1627
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5554617332648137
          cur_lr: 5.000000000000001e-05
          entropy: 1.9774468885527716
          entropy_coeff: 0.009999999999999998
          kl: 0.0031757795538509105
          policy_loss: -0.07900830027129915
          total_loss: -0.039632601849734786
          vf_explained_var: 0.3263607323169708
          vf_loss: 0.05738614196371701
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,459,19809.6,459000,2.9819,9.92,-8.89,180.36




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-27_02-41-26
  done: false
  episode_len_mean: 179.76
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.8309000000000215
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 6
  episodes_total: 1633
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27773086663240687
          cur_lr: 5.000000000000001e-05
          entropy: 2.1610175172487893
          entropy_coeff: 0.009999999999999998
          kl: 0.008647557008657753
          policy_loss: -0.05241694665617413
          total_loss: -0.008912036278181606
          vf_explained_var: 0.47685641050338745
          vf_loss: 0.0627133920426584
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,460,19904.7,460000,2.8309,9.92,-8.89,179.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-27_02-43-04
  done: false
  episode_len_mean: 182.06
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.864300000000022
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 6
  episodes_total: 1639
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27773086663240687
          cur_lr: 5.000000000000001e-05
          entropy: 1.8756831407546997
          entropy_coeff: 0.009999999999999998
          kl: 0.025933199543494368
          policy_loss: 0.09120459796653854
          total_loss: 0.5701335498442253
          vf_explained_var: 0.5406161546707153
          vf_loss: 0.49048332849310505
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,461,20003.4,461000,2.8643,9.92,-8.89,182.06




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-27_02-43-40
  done: false
  episode_len_mean: 191.63
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.6881000000000226
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 3
  episodes_total: 1642
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.2738029268052844
          entropy_coeff: 0.009999999999999998
          kl: 0.007444177087427365
          policy_loss: -0.006080290757947498
          total_loss: 0.019508670642971994
          vf_explained_var: 0.17048342525959015
          vf_loss: 0.04522577248668919
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,462,20038.8,462000,2.6881,9.92,-8.89,191.63




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-27_02-44-14
  done: false
  episode_len_mean: 191.49
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.6895000000000233
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 3
  episodes_total: 1645
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.299839798609416
          entropy_coeff: 0.009999999999999998
          kl: 0.006546454785647171
          policy_loss: -0.022868880753715834
          total_loss: -0.02813036007185777
          vf_explained_var: 0.16051267087459564
          vf_loss: 0.015009692105619858
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,463,20073.2,463000,2.6895,9.92,-8.89,191.49




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-27_02-45-48
  done: false
  episode_len_mean: 186.29
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.782600000000022
  episode_reward_min: -8.88999999999992
  episodes_this_iter: 5
  episodes_total: 1650
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.2459756043222217
          entropy_coeff: 0.009999999999999998
          kl: 0.009231999456086982
          policy_loss: -0.03962440289970901
          total_loss: 0.011877963774734074
          vf_explained_var: 0.5591550469398499
          vf_loss: 0.07011610775565108
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,464,20167.1,464000,2.7826,9.92,-8.89,186.29




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-27_02-50-33
  done: false
  episode_len_mean: 169.84
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.337600000000019
  episode_reward_min: -7.48999999999992
  episodes_this_iter: 16
  episodes_total: 1666
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.6922426210509407
          entropy_coeff: 0.009999999999999998
          kl: 0.011908337727185492
          policy_loss: -0.0259988395911124
          total_loss: 0.11637390334573057
          vf_explained_var: 0.5792280435562134
          vf_loss: 0.15433420462326872
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,465,20451.6,465000,3.3376,9.92,-7.49,169.84


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-27_02-50-48
  done: false
  episode_len_mean: 175.74
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.1781000000000206
  episode_reward_min: -7.48999999999992
  episodes_this_iter: 1
  episodes_total: 1667
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.1460189210044014
          entropy_coeff: 0.009999999999999998
          kl: 0.006204514965048618
          policy_loss: -0.1943187742597527
          total_loss: -0.20066946008139186
          vf_explained_var: 0.3528487980365753
          vf_loss: 0.01252472306498223
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,466,20467.4,466000,3.1781,9.92,-7.49,175.74




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-27_02-52-21
  done: false
  episode_len_mean: 174.4
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 3.3419000000000203
  episode_reward_min: -7.019999999999933
  episodes_this_iter: 6
  episodes_total: 1673
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.9908324016465082
          entropy_coeff: 0.009999999999999998
          kl: 0.009245529378814411
          policy_loss: 0.0495823949161503
          total_loss: 0.19075216568178602
          vf_explained_var: 0.35711538791656494
          vf_loss: 0.15722644040361047
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,467,20559.5,467000,3.3419,9.92,-7.02,174.4




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-27_02-53-32
  done: false
  episode_len_mean: 167.63
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 3.4210000000000194
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 5
  episodes_total: 1678
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.04823929866155
          entropy_coeff: 0.009999999999999998
          kl: 0.007890739229262264
          policy_loss: 0.021451398564709557
          total_loss: 0.08082829399241341
          vf_explained_var: 0.2040158212184906
          vf_loss: 0.07657203628008978
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,468,20631.5,468000,3.421,9.92,-7.29,167.63




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-27_02-55-22
  done: false
  episode_len_mean: 169.91
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.29850000000002
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 7
  episodes_total: 1685
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.0066826343536377
          entropy_coeff: 0.009999999999999998
          kl: 0.006673063047949116
          policy_loss: 0.026495965984132556
          total_loss: 0.04836033375726806
          vf_explained_var: 0.5622775554656982
          vf_loss: 0.03915121938205428
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,469,20741.3,469000,3.2985,9.93,-7.29,169.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-27_02-58-12
  done: false
  episode_len_mean: 169.79
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1834000000000207
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 10
  episodes_total: 1695
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.7218967888090346
          entropy_coeff: 0.009999999999999998
          kl: 0.009183054663386928
          policy_loss: -0.12459669725762473
          total_loss: 0.07772890105843544
          vf_explained_var: 0.7313281297683716
          vf_loss: 0.2157189415146907
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,470,20911.1,470000,3.1834,9.93,-7.29,169.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-27_02-58-29
  done: false
  episode_len_mean: 176.16
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.019200000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 1
  episodes_total: 1696
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.963492246468862
          entropy_coeff: 0.009999999999999998
          kl: 0.008147081996546662
          policy_loss: -0.09482406835175222
          total_loss: -0.09779865226397912
          vf_explained_var: 0.45030370354652405
          vf_loss: 0.013266292977560726
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,471,20927.8,471000,3.0192,9.93,-7.29,176.16




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-27_03-00-58
  done: false
  episode_len_mean: 168.07
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.344400000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 9
  episodes_total: 1705
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.8843279745843675
          entropy_coeff: 0.009999999999999998
          kl: 0.006786061030916388
          policy_loss: -0.01962637934419844
          total_loss: -0.015058150225215488
          vf_explained_var: 0.2776910066604614
          vf_loss: 0.020584461038621764
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,472,21076.8,472000,3.3444,9.93,-7.29,168.07




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-27_03-02-11
  done: false
  episode_len_mean: 169.23
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2797000000000214
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 5
  episodes_total: 1710
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.995599631468455
          entropy_coeff: 0.009999999999999998
          kl: 0.005830707206477253
          policy_loss: 0.023592269834544925
          total_loss: 0.015100335329771042
          vf_explained_var: 0.31924375891685486
          vf_loss: 0.009035010591873692
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,473,21149.5,473000,3.2797,9.93,-7.29,169.23




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-27_03-03-42
  done: false
  episode_len_mean: 163.89
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.465100000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 6
  episodes_total: 1716
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.220471265580919
          entropy_coeff: 0.009999999999999998
          kl: 0.008261865346741497
          policy_loss: 0.028373317130737836
          total_loss: 0.09453373476862907
          vf_explained_var: 0.4011736214160919
          vf_loss: 0.08492326804747184
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,474,21241,474000,3.4651,9.93,-7.29,163.89




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-27_03-05-26
  done: false
  episode_len_mean: 167.2
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2600000000000207
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 6
  episodes_total: 1722
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.1308074341879952
          entropy_coeff: 0.009999999999999998
          kl: 0.01083107919460752
          policy_loss: 0.020165752412544355
          total_loss: 0.013603176217940119
          vf_explained_var: 0.7255685329437256
          vf_loss: 0.010233311897738732
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,475,21344.4,475000,3.26,9.93,-7.29,167.2




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-27_03-06-40
  done: false
  episode_len_mean: 167.69
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2880000000000207
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 4
  episodes_total: 1726
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.9587669875886706
          entropy_coeff: 0.009999999999999998
          kl: 0.00941781705177587
          policy_loss: -0.005365922157135275
          total_loss: 0.42731706059227387
          vf_explained_var: 0.7103200554847717
          vf_loss: 0.4483472234905801
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,476,21419.2,476000,3.288,9.93,-7.29,167.69




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-27_03-07-19
  done: false
  episode_len_mean: 175.75
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.0985000000000222
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 4
  episodes_total: 1730
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.2611417134602863
          entropy_coeff: 0.009999999999999998
          kl: 0.007413609137027964
          policy_loss: -0.17789707344232333
          total_loss: -0.14895011726766824
          vf_explained_var: 0.6309677958488464
          vf_loss: 0.048469891419841184
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,477,21457.7,477000,3.0985,9.93,-7.29,175.75




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-27_03-08-15
  done: false
  episode_len_mean: 175.62
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.216600000000022
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 3
  episodes_total: 1733
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.1063074853685166
          entropy_coeff: 0.009999999999999998
          kl: 0.016476559942691496
          policy_loss: -0.028172478328148523
          total_loss: -0.03444847174816661
          vf_explained_var: 0.498823881149292
          vf_loss: 0.00792300302742256
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,478,21513.3,478000,3.2166,9.93,-7.29,175.62




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-27_03-09-50
  done: false
  episode_len_mean: 177.2
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2239000000000213
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 6
  episodes_total: 1739
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.0334161109394495
          entropy_coeff: 0.009999999999999998
          kl: 0.00934707601058542
          policy_loss: -0.12083051626880964
          total_loss: -0.1262454212539726
          vf_explained_var: 0.92462557554245
          vf_loss: 0.011025300777206818
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,479,21608.7,479000,3.2239,9.93,-7.29,177.2




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-27_03-11-07
  done: false
  episode_len_mean: 169.72
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.340700000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 6
  episodes_total: 1745
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.8858338899082607
          entropy_coeff: 0.009999999999999998
          kl: 0.011393362164163425
          policy_loss: 0.01842140249080128
          total_loss: 0.14225820990072358
          vf_explained_var: 0.9455795884132385
          vf_loss: 0.13794871610071924
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,480,21686.1,480000,3.3407,9.93,-7.29,169.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-27_03-11-26
  done: false
  episode_len_mean: 174.39
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.243600000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 2
  episodes_total: 1747
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 2.0823143985536365
          entropy_coeff: 0.009999999999999998
          kl: 0.014677927321314302
          policy_loss: -0.14817518418033918
          total_loss: -0.15838756466077433
          vf_explained_var: 0.8806220889091492
          vf_loss: 0.004495992471412238
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,481,21704.4,481000,3.2436,9.93,-7.29,174.39




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-27_03-13-53
  done: false
  episode_len_mean: 176.86
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.263700000000021
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 9
  episodes_total: 1756
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.800100611315833
          entropy_coeff: 0.009999999999999998
          kl: 0.005663012979263549
          policy_loss: -0.008562825868527095
          total_loss: 0.01693244915869501
          vf_explained_var: 0.5283498167991638
          vf_loss: 0.04113708978273078
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,482,21851.3,482000,3.2637,9.93,-7.29,176.86




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-27_03-15-45
  done: false
  episode_len_mean: 182.42
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.0492000000000217
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 6
  episodes_total: 1762
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.9471359305911593
          entropy_coeff: 0.009999999999999998
          kl: 0.009544752979402521
          policy_loss: 0.0038329145974583096
          total_loss: 0.022628837327162424
          vf_explained_var: 0.8161336183547974
          vf_loss: 0.034290966474347644
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,483,21964.1,483000,3.0492,9.93,-7.29,182.42




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-27_03-17-57
  done: false
  episode_len_mean: 176.85
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1771000000000216
  episode_reward_min: -7.289999999999898
  episodes_this_iter: 8
  episodes_total: 1770
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.881375081009335
          entropy_coeff: 0.009999999999999998
          kl: 0.013984692419545672
          policy_loss: 0.026970361669858296
          total_loss: 0.2560645264055994
          vf_explained_var: 0.870242714881897
          vf_loss: 0.24208195022462556
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,484,22095.6,484000,3.1771,9.93,-7.29,176.85




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-27_03-20-07
  done: false
  episode_len_mean: 170.78
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.40540000000002
  episode_reward_min: -6.639999999999933
  episodes_this_iter: 8
  episodes_total: 1778
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4165962999486102
          cur_lr: 5.000000000000001e-05
          entropy: 1.8584838933414882
          entropy_coeff: 0.009999999999999998
          kl: 0.032953665054888205
          policy_loss: -0.010336771607398987
          total_loss: 0.10069065772824817
          vf_explained_var: 0.45390501618385315
          vf_loss: 0.1158838922649415
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,485,22225.2,485000,3.4054,9.93,-6.64,170.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-27_03-21-02
  done: false
  episode_len_mean: 170.33
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.35880000000002
  episode_reward_min: -6.639999999999933
  episodes_this_iter: 4
  episodes_total: 1782
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 2.061407526334127
          entropy_coeff: 0.009999999999999998
          kl: 0.006462619566007863
          policy_loss: 0.04389541405770514
          total_loss: 0.03632490730120076
          vf_explained_var: 0.5351939797401428
          vf_loss: 0.009005111572332681
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,486,22280.4,486000,3.3588,9.93,-6.64,170.33




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-27_03-22-13
  done: false
  episode_len_mean: 176.4
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.237600000000021
  episode_reward_min: -6.639999999999933
  episodes_this_iter: 4
  episodes_total: 1786
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9333956638971965
          entropy_coeff: 0.009999999999999998
          kl: 0.01330964279836433
          policy_loss: -0.07467512442833847
          total_loss: -0.006995566230681208
          vf_explained_var: 0.33710402250289917
          vf_loss: 0.07869638903761775
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,487,22351.2,487000,3.2376,9.93,-6.64,176.4




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-27_03-24-47
  done: false
  episode_len_mean: 177.18
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1664000000000208
  episode_reward_min: -6.639999999999933
  episodes_this_iter: 9
  episodes_total: 1795
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9191936042573716
          entropy_coeff: 0.009999999999999998
          kl: 0.007132703388079046
          policy_loss: -0.057470847583479354
          total_loss: -0.044431611233287385
          vf_explained_var: 0.017184315249323845
          vf_loss: 0.027773990109562875
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,488,22505.8,488000,3.1664,9.93,-6.64,177.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-27_03-25-41
  done: false
  episode_len_mean: 175.57
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.182100000000021
  episode_reward_min: -6.639999999999933
  episodes_this_iter: 4
  episodes_total: 1799
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 2.0141228397687274
          entropy_coeff: 0.009999999999999998
          kl: 0.008082689071299281
          policy_loss: -0.05493536964058876
          total_loss: -0.050646241505940756
          vf_explained_var: 0.31331154704093933
          vf_loss: 0.019379528007832252
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,489,22559.5,489000,3.1821,9.93,-6.64,175.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-27_03-31-01
  done: false
  episode_len_mean: 156.11
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.689800000000018
  episode_reward_min: -6.629999999999946
  episodes_this_iter: 18
  episodes_total: 1817
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.8857298758294847
          entropy_coeff: 0.009999999999999998
          kl: 0.005643247886751664
          policy_loss: 0.0037678316235542296
          total_loss: 0.16226311417089567
          vf_explained_var: 0.8439285159111023
          vf_loss: 0.17382614455095285
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,490,22879.6,490000,3.6898,9.93,-6.63,156.11




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-27_03-33-13
  done: false
  episode_len_mean: 155.76
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.706600000000017
  episode_reward_min: -6.629999999999946
  episodes_this_iter: 8
  episodes_total: 1825
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.9362204750378926
          entropy_coeff: 0.009999999999999998
          kl: 0.010127730604146378
          policy_loss: -0.0019719324592087006
          total_loss: 0.2042387314968639
          vf_explained_var: 0.8680158853530884
          vf_loss: 0.21924410409087108
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,491,23011.5,491000,3.7066,9.93,-6.63,155.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-27_03-36-46
  done: false
  episode_len_mean: 133.43
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 4.165800000000015
  episode_reward_min: -6.199999999999912
  episodes_this_iter: 12
  episodes_total: 1837
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6248944499229154
          cur_lr: 5.000000000000001e-05
          entropy: 1.630443569024404
          entropy_coeff: 0.009999999999999998
          kl: 0.06836276961224154
          policy_loss: 0.19688164384828674
          total_loss: 0.8333355893691381
          vf_explained_var: 0.7007209658622742
          vf_loss: 0.6100388644470109
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,492,23224.3,492000,4.1658,9.93,-6.2,133.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-27_03-37-45
  done: false
  episode_len_mean: 137.01
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 4.019600000000016
  episode_reward_min: -6.199999999999912
  episodes_this_iter: 4
  episodes_total: 1841
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 2.030192524856991
          entropy_coeff: 0.009999999999999998
          kl: 0.0065197243018510894
          policy_loss: -0.09686227807154259
          total_loss: -0.06354361319293579
          vf_explained_var: 0.800511360168457
          vf_loss: 0.04750937817799342
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,493,23283.8,493000,4.0196,9.93,-6.2,137.01




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-27_03-39-19
  done: false
  episode_len_mean: 128.41
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 4.268900000000013
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 6
  episodes_total: 1847
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8394667148590087
          entropy_coeff: 0.009999999999999998
          kl: 0.011289196976057427
          policy_loss: -0.1221737874050935
          total_loss: -0.016588008569346533
          vf_explained_var: 0.6871225833892822
          vf_loss: 0.11339861432918244
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,494,23377.5,494000,4.2689,9.93,-6.48,128.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-27_03-41-48
  done: false
  episode_len_mean: 129.42
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.2442000000000135
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 9
  episodes_total: 1856
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.9188488827811347
          entropy_coeff: 0.009999999999999998
          kl: 0.0058347956656967645
          policy_loss: 0.10118912388053206
          total_loss: 0.13003848495168818
          vf_explained_var: 0.049652792513370514
          vf_loss: 0.04256865239650425
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 4950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,495,23526,495000,4.2442,9.94,-6.48,129.42




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-27_03-46-31
  done: false
  episode_len_mean: 115.34
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.614100000000012
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 16
  episodes_total: 1872
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9373416748843731
          cur_lr: 5.000000000000001e-05
          entropy: 1.8371709399753147
          entropy_coeff: 0.009999999999999998
          kl: 0.004347161201308304
          policy_loss: 0.07194399784008662
          total_loss: 0.09943683747616079
          vf_explained_var: 0.47070395946502686
          vf_loss: 0.041789774084463716
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 49600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,496,23809.7,496000,4.6141,9.94,-6.48,115.34




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-27_03-47-27
  done: false
  episode_len_mean: 123.93
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.318700000000013
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 4
  episodes_total: 1876
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46867083744218657
          cur_lr: 5.000000000000001e-05
          entropy: 1.9872773951954312
          entropy_coeff: 0.009999999999999998
          kl: 0.01376284269071826
          policy_loss: -0.028065216872427197
          total_loss: -0.026362353728877172
          vf_explained_var: 0.6303767561912537
          vf_loss: 0.015125397846309676
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,497,23865.5,497000,4.3187,9.94,-6.48,123.93




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-27_03-49-02
  done: false
  episode_len_mean: 117.61
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.482200000000013
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 6
  episodes_total: 1882
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46867083744218657
          cur_lr: 5.000000000000001e-05
          entropy: 1.8504459500312804
          entropy_coeff: 0.009999999999999998
          kl: 0.004955082338299436
          policy_loss: -0.18395198434591292
          total_loss: -0.19005920572413337
          vf_explained_var: 0.905030369758606
          vf_loss: 0.010074938523272674
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 49800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,498,23960.6,498000,4.4822,9.94,-6.48,117.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-27_03-49-42
  done: false
  episode_len_mean: 119.91
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.412800000000011
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 3
  episodes_total: 1885
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.9178258763419258
          entropy_coeff: 0.009999999999999998
          kl: 0.01095748477927917
          policy_loss: -0.02079889507343372
          total_loss: 0.03864221111353901
          vf_explained_var: 0.4439784288406372
          vf_loss: 0.076051640117334
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,499,24000.2,499000,4.4128,9.94,-6.48,119.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-27_03-51-37
  done: false
  episode_len_mean: 124.16
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.410200000000013
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 7
  episodes_total: 1892
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.0903382513258193
          entropy_coeff: 0.009999999999999998
          kl: 0.008206505789039416
          policy_loss: -0.07254787741435899
          total_loss: -0.05830367969142066
          vf_explained_var: 0.6636908054351807
          vf_loss: 0.033224504885988104
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 5000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,500,24115.7,500000,4.4102,9.94,-6.48,124.16




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-27_03-52-56
  done: false
  episode_len_mean: 126.34
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.351000000000012
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 5
  episodes_total: 1897
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.0631297085020277
          entropy_coeff: 0.009999999999999998
          kl: 0.013214254768244239
          policy_loss: -0.05110249833928214
          total_loss: 0.08482721174756686
          vf_explained_var: 0.044721826910972595
          vf_loss: 0.15346443735761567
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 5010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,501,24194,501000,4.351,9.94,-6.48,126.34




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-27_03-53-55
  done: false
  episode_len_mean: 123.58
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.388700000000013
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 4
  episodes_total: 1901
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.0386825932396784
          entropy_coeff: 0.009999999999999998
          kl: 0.006158267395478189
          policy_loss: -0.06335948237942325
          total_loss: -0.06393844092057811
          vf_explained_var: 0.24595235288143158
          vf_loss: 0.018364769031500652
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,502,24253,502000,4.3887,9.94,-6.48,123.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-27_03-55-51
  done: false
  episode_len_mean: 134.75
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 4.040500000000014
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 8
  episodes_total: 1909
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.8308530264430576
          entropy_coeff: 0.009999999999999998
          kl: 0.016129889024457632
          policy_loss: -0.10998514158030351
          total_loss: -0.06841217105587323
          vf_explained_var: 0.7315248847007751
          vf_loss: 0.056101693916651936
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 5030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,503,24368.8,503000,4.0405,9.94,-6.48,134.75




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-27_03-56-35
  done: false
  episode_len_mean: 139.33
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.8629000000000144
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 4
  episodes_total: 1913
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.7226097067197164
          entropy_coeff: 0.009999999999999998
          kl: 0.016148218236936154
          policy_loss: 0.044485946744680406
          total_loss: 0.21865740211473572
          vf_explained_var: 0.6650314927101135
          vf_loss: 0.18761344804531999
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 50400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,504,24413.2,504000,3.8629,9.94,-6.48,139.33




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-27_03-57-55
  done: false
  episode_len_mean: 146.78
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.7306000000000155
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 5
  episodes_total: 1918
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.8532409773932563
          entropy_coeff: 0.009999999999999998
          kl: 0.009304266902350219
          policy_loss: -0.014569288078281615
          total_loss: 0.006047972043355306
          vf_explained_var: 0.7250693440437317
          vf_loss: 0.036969347911265986
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,505,24492.8,505000,3.7306,9.94,-6.48,146.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-27_03-58-53
  done: false
  episode_len_mean: 154.07
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.5048000000000155
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 5
  episodes_total: 1923
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.7304859823650784
          entropy_coeff: 0.009999999999999998
          kl: 0.008503102749936767
          policy_loss: -0.04455023668706417
          total_loss: -0.0439220421637098
          vf_explained_var: 0.8572321534156799
          vf_loss: 0.01594047813107156
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 50600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,506,24551.4,506000,3.5048,9.94,-6.48,154.07




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-27_04-00-48
  done: false
  episode_len_mean: 155.05
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.623100000000017
  episode_reward_min: -6.479999999999947
  episodes_this_iter: 7
  episodes_total: 1930
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.9211722983254327
          entropy_coeff: 0.009999999999999998
          kl: 0.014018040683290461
          policy_loss: 0.07322831145591206
          total_loss: 0.08776650329430898
          vf_explained_var: 0.7767441868782043
          vf_loss: 0.03046499252733257
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,507,24666.3,507000,3.6231,9.94,-6.48,155.05




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-27_04-01-50
  done: false
  episode_len_mean: 159.56
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.396100000000017
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 4
  episodes_total: 1934
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.7961143175760905
          entropy_coeff: 0.009999999999999998
          kl: 0.0115375783685269
          policy_loss: -0.0011698911328696542
          total_loss: 0.08558185001214345
          vf_explained_var: 0.3013874590396881
          vf_loss: 0.10200921764804258
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,508,24727.7,508000,3.3961,9.94,-7.42,159.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-27_04-03-23
  done: false
  episode_len_mean: 160.18
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.3864000000000174
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 6
  episodes_total: 1940
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.8876665274302165
          entropy_coeff: 0.009999999999999998
          kl: 0.018012005214566675
          policy_loss: -0.029409504267904493
          total_loss: 0.126394767810901
          vf_explained_var: 0.8013403415679932
          vf_loss: 0.17046008439113697
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,509,24821.2,509000,3.3864,9.94,-7.42,160.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-27_04-04-59
  done: false
  episode_len_mean: 163.32
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.3164000000000184
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 6
  episodes_total: 1946
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.734250557422638
          entropy_coeff: 0.009999999999999998
          kl: 0.01345690519011041
          policy_loss: -0.05459748374091254
          total_loss: -0.0005368390017085605
          vf_explained_var: 0.6402098536491394
          vf_loss: 0.06824971815286618
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 51000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,510,24916.9,510000,3.3164,9.94,-7.42,163.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-27_04-06-16
  done: false
  episode_len_mean: 157.92
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.475800000000017
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 5
  episodes_total: 1951
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.790405445628696
          entropy_coeff: 0.009999999999999998
          kl: 0.010356828821405392
          policy_loss: -0.041767064999375075
          total_loss: 0.0006847299635410308
          vf_explained_var: 0.8813321590423584
          vf_loss: 0.05792887955904007
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 51100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,511,24994.1,511000,3.4758,9.94,-7.42,157.92




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-27_04-07-52
  done: false
  episode_len_mean: 166.03
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2347000000000175
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 7
  episodes_total: 1958
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.6709901319609748
          entropy_coeff: 0.009999999999999998
          kl: 0.009785129161394455
          policy_loss: -0.008966950409942203
          total_loss: -0.0065898785160647495
          vf_explained_var: 0.7510635256767273
          vf_loss: 0.016793970867810357
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,512,25089.6,512000,3.2347,9.93,-7.42,166.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-27_04-09-25
  done: false
  episode_len_mean: 174.84
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.9963000000000193
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 6
  episodes_total: 1964
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.6558560623062981
          entropy_coeff: 0.009999999999999998
          kl: 0.004651476424901683
          policy_loss: -0.110961188789871
          total_loss: -0.11731641391913096
          vf_explained_var: 0.11152383685112
          vf_loss: 0.00911332953773025
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,513,25183.4,513000,2.9963,9.93,-7.42,174.84




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-27_04-12-18
  done: false
  episode_len_mean: 171.58
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.15730000000002
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 9
  episodes_total: 1973
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 1.6552539653248257
          entropy_coeff: 0.009999999999999998
          kl: 0.009830383066959058
          policy_loss: -0.046159467101097106
          total_loss: -0.03433456996248828
          vf_explained_var: 0.8982795476913452
          vf_loss: 0.027225635535756333
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 51400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,514,25355.9,514000,3.1573,9.94,-7.42,171.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-27_04-13-14
  done: false
  episode_len_mean: 176.1
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.0528000000000195
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 5
  episodes_total: 1978
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 1.8239097250832452
          entropy_coeff: 0.009999999999999998
          kl: 0.014715776629104147
          policy_loss: 0.015002118547757466
          total_loss: 0.015655133417911
          vf_explained_var: 0.3191334307193756
          vf_loss: 0.017167898752894768
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,515,25412.3,515000,3.0528,9.94,-7.42,176.1




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-27_04-15-10
  done: false
  episode_len_mean: 173.39
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.1430000000000193
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 7
  episodes_total: 1985
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 2.0074107196595934
          entropy_coeff: 0.009999999999999998
          kl: 0.020457840514862685
          policy_loss: 0.01694389631350835
          total_loss: 0.10341583084728982
          vf_explained_var: 0.7995386123657227
          vf_loss: 0.10414904549510942
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,516,25527.4,516000,3.143,9.94,-7.42,173.39




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-27_04-16-44
  done: false
  episode_len_mean: 173.45
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.1610000000000196
  episode_reward_min: -7.41999999999992
  episodes_this_iter: 6
  episodes_total: 1991
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17575156404082
          cur_lr: 5.000000000000001e-05
          entropy: 1.9808706707424588
          entropy_coeff: 0.009999999999999998
          kl: 0.008071203643953594
          policy_loss: 0.21811735332012178
          total_loss: 0.2041392571396298
          vf_explained_var: 0.8349637389183044
          vf_loss: 0.004412082052375707
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,517,25621.8,517000,3.161,9.94,-7.42,173.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-27_04-17-07
  done: false
  episode_len_mean: 177.02
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 2.9718000000000195
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 2
  episodes_total: 1993
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17575156404082
          cur_lr: 5.000000000000001e-05
          entropy: 1.8524173312717014
          entropy_coeff: 0.009999999999999998
          kl: 0.030196591494613283
          policy_loss: 0.07551500482691659
          total_loss: 0.4584478014873134
          vf_explained_var: 0.5816366672515869
          vf_loss: 0.39614986230929694
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,518,25644.6,518000,2.9718,9.94,-9.93,177.02




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-27_04-21-50
  done: false
  episode_len_mean: 160.26
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.508700000000017
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 16
  episodes_total: 2009
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 1.7244225687450834
          entropy_coeff: 0.009999999999999998
          kl: 0.07533666259501891
          policy_loss: 0.08105358117156558
          total_loss: 0.6513787364794148
          vf_explained_var: 0.7926180362701416
          vf_loss: 0.5677085820171568
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,519,25927.8,519000,3.5087,9.94,-9.93,160.26




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-27_04-23-07
  done: false
  episode_len_mean: 155.6
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.603500000000017
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 5
  episodes_total: 2014
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 2.011595768398709
          entropy_coeff: 0.009999999999999998
          kl: 0.01219745425230118
          policy_loss: 0.03952701220081912
          total_loss: 0.058812432611982025
          vf_explained_var: 0.5599509477615356
          vf_loss: 0.03457800517272618
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,520,26005.2,520000,3.6035,9.94,-9.93,155.6




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-27_04-25-24
  done: false
  episode_len_mean: 151.15
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.780000000000017
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 8
  episodes_total: 2022
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 1.9168968266910977
          entropy_coeff: 0.009999999999999998
          kl: 0.009378479774427954
          policy_loss: -0.12268908553653293
          total_loss: -0.10075493794348504
          vf_explained_var: 0.9935054183006287
          vf_loss: 0.03739447996227278
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 52100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,521,26142,521000,3.78,9.94,-9.93,151.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-27_04-26-45
  done: false
  episode_len_mean: 150.72
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.806500000000016
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 6
  episodes_total: 2028
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 2.19668882422977
          entropy_coeff: 0.009999999999999998
          kl: 0.016118189455191098
          policy_loss: -0.08935538588298692
          total_loss: 0.011137964824835459
          vf_explained_var: 0.3352469503879547
          vf_loss: 0.11608644628690348
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,522,26222.6,522000,3.8065,9.94,-9.93,150.72


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-27_04-27-08
  done: false
  episode_len_mean: 158.53
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.527400000000018
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 2
  episodes_total: 2030
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 2.1514730758137173
          entropy_coeff: 0.009999999999999998
          kl: 0.010939596763854966
          policy_loss: -0.08786425747805171
          total_loss: -0.08375097894006306
          vf_explained_var: 0.12462367862462997
          vf_loss: 0.0213020462801473
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 52300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,523,26245.6,523000,3.5274,9.94,-9.93,158.53




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-27_04-28-26
  done: false
  episode_len_mean: 154.12
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.6969000000000163
  episode_reward_min: -9.929999999999936
  episodes_this_iter: 5
  episodes_total: 2035
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 2.0871734499931334
          entropy_coeff: 0.009999999999999998
          kl: 0.010172789666101422
          policy_loss: -0.04685737159517076
          total_loss: -0.05493523681329356
          vf_explained_var: 0.7680670022964478
          vf_loss: 0.008771131751644942
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,524,26323.4,524000,3.6969,9.94,-9.93,154.12


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-27_04-28-49
  done: false
  episode_len_mean: 161.55
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.313100000000016
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 3
  episodes_total: 2038
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.39544101909184504
          cur_lr: 5.000000000000001e-05
          entropy: 2.0095635771751406
          entropy_coeff: 0.009999999999999998
          kl: 0.034963817194390394
          policy_loss: 0.06522536642021604
          total_loss: 0.3497944337626298
          vf_explained_var: 0.5293348431587219
          vf_loss: 0.290838587594529
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,525,26346.2,525000,3.3131,9.94,-15.33,161.55




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-27_04-30-24
  done: false
  episode_len_mean: 162.92
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.246700000000016
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 6
  episodes_total: 2044
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5931615286377675
          cur_lr: 5.000000000000001e-05
          entropy: 1.9681620849503412
          entropy_coeff: 0.009999999999999998
          kl: 0.022626704894352763
          policy_loss: -0.007083997792667813
          total_loss: 0.2450926038953993
          vf_explained_var: 0.4891227185726166
          vf_loss: 0.25843693152483965
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,526,26441.4,526000,3.2467,9.94,-15.33,162.92




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-27_04-33-00
  done: false
  episode_len_mean: 159.69
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.371800000000016
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 9
  episodes_total: 2053
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 1.9966849353578355
          entropy_coeff: 0.009999999999999998
          kl: 0.008420528808466561
          policy_loss: 0.2473854010303815
          total_loss: 0.3641950367225541
          vf_explained_var: 0.7944587469100952
          vf_loss: 0.1292843824976848
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,527,26597.6,527000,3.3718,9.94,-15.33,159.69




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-27_04-35-36
  done: false
  episode_len_mean: 150.96
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.578900000000015
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 9
  episodes_total: 2062
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 1.830541565683153
          entropy_coeff: 0.009999999999999998
          kl: 0.01107100626843832
          policy_loss: 0.18575446986489827
          total_loss: 0.3436108370621999
          vf_explained_var: 0.890595018863678
          vf_loss: 0.16631143951995506
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,528,26753.3,528000,3.5789,9.94,-15.33,150.96




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-27_04-36-19
  done: false
  episode_len_mean: 157.37
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 3.3805000000000156
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 2
  episodes_total: 2064
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 2.0837163236406115
          entropy_coeff: 0.009999999999999998
          kl: 0.014812604156621568
          policy_loss: -0.061770896199676725
          total_loss: 0.27785166737933953
          vf_explained_var: 0.5733751654624939
          vf_loss: 0.3472803216841486
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 52900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,529,26796.7,529000,3.3805,9.94,-15.33,157.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-27_04-37-19
  done: false
  episode_len_mean: 164.03
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.0872000000000157
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 5
  episodes_total: 2069
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 2.1333385003937617
          entropy_coeff: 0.009999999999999998
          kl: 0.0057973531944269475
          policy_loss: 0.000211193785071373
          total_loss: 0.12549445368349552
          vf_explained_var: 0.6065760850906372
          vf_loss: 0.1414584918672012
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,530,26856.8,530000,3.0872,9.93,-15.33,164.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-27_04-39-15
  done: false
  episode_len_mean: 162.89
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1196000000000152
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 7
  episodes_total: 2076
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 2.012925104300181
          entropy_coeff: 0.009999999999999998
          kl: 0.013758594458502277
          policy_loss: -0.057207885715696544
          total_loss: 0.07187291267845365
          vf_explained_var: 0.9199143052101135
          vf_loss: 0.13696844718522497
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,531,26973,531000,3.1196,9.93,-15.33,162.89




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-27_04-42-49
  done: false
  episode_len_mean: 150.19
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.350400000000014
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 12
  episodes_total: 2088
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 1.924218205610911
          entropy_coeff: 0.009999999999999998
          kl: 0.005806773553272058
          policy_loss: 0.101699415097634
          total_loss: 0.11384958202640215
          vf_explained_var: 0.9930328130722046
          vf_loss: 0.026225815589229266
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,532,27187,532000,3.3504,9.93,-15.33,150.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-27_04-43-15
  done: false
  episode_len_mean: 157.56
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.097500000000015
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 2
  episodes_total: 2090
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 2.0991914868354797
          entropy_coeff: 0.009999999999999998
          kl: 0.006075018769897156
          policy_loss: -0.14582690041926172
          total_loss: -0.06870388719770644
          vf_explained_var: 0.7890663146972656
          vf_loss: 0.09270972922547824
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,533,27212.4,533000,3.0975,9.93,-15.33,157.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-27_04-46-25
  done: false
  episode_len_mean: 152.63
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.165600000000014
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 12
  episodes_total: 2102
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 1.8737118257416618
          entropy_coeff: 0.009999999999999998
          kl: 0.007491613326009312
          policy_loss: -0.05133714994622601
          total_loss: 0.09231072730488248
          vf_explained_var: 0.9786083698272705
          vf_loss: 0.15571939177397226
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,534,27402.4,534000,3.1656,9.93,-15.33,152.63




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-27_04-50-35
  done: false
  episode_len_mean: 143.55
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.456400000000012
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 13
  episodes_total: 2115
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 1.7680168218082852
          entropy_coeff: 0.009999999999999998
          kl: 0.011055831648998272
          policy_loss: -0.07585188680224948
          total_loss: 0.009866791052950753
          vf_explained_var: 0.9548815488815308
          vf_loss: 0.09356200338289555
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,535,27652.2,535000,3.4564,9.93,-15.33,143.55




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-27_04-51-52
  done: false
  episode_len_mean: 152.26
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.1968000000000143
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 6
  episodes_total: 2121
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8897422929566512
          cur_lr: 5.000000000000001e-05
          entropy: 2.305041609870063
          entropy_coeff: 0.009999999999999998
          kl: 0.0042693765976729225
          policy_loss: 0.057140387925836776
          total_loss: 0.043224500368038814
          vf_explained_var: 0.7869454026222229
          vf_loss: 0.00533587995192243
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,536,27729,536000,3.1968,9.93,-15.33,152.26




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-27_04-54-51
  done: false
  episode_len_mean: 138.03
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.526600000000012
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 10
  episodes_total: 2131
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4448711464783256
          cur_lr: 5.000000000000001e-05
          entropy: 2.0260865887006125
          entropy_coeff: 0.009999999999999998
          kl: 0.002330278161648991
          policy_loss: -0.19074058102236854
          total_loss: -0.20464521365033256
          vf_explained_var: 0.9245277643203735
          vf_loss: 0.005319560435600579
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,537,27908.9,537000,3.5266,9.93,-15.33,138.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-27_04-55-31
  done: false
  episode_len_mean: 141.48
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.3920000000000123
  episode_reward_min: -15.330000000000043
  episodes_this_iter: 3
  episodes_total: 2134
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2224355732391628
          cur_lr: 5.000000000000001e-05
          entropy: 2.082663271162245
          entropy_coeff: 0.009999999999999998
          kl: 0.013314209984745378
          policy_loss: -0.020052724828322727
          total_loss: -0.024298028730683856
          vf_explained_var: 0.6553983092308044
          vf_loss: 0.013619778566579852
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,538,27948.1,538000,3.392,9.93,-15.33,141.48




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-27_04-58-06
  done: false
  episode_len_mean: 128.26
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.807400000000012
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 9
  episodes_total: 2143
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2224355732391628
          cur_lr: 5.000000000000001e-05
          entropy: 1.8940320544772677
          entropy_coeff: 0.009999999999999998
          kl: 0.013094855093767304
          policy_loss: -0.08223340316779083
          total_loss: -0.06289265238576465
          vf_explained_var: 0.9778695702552795
          vf_loss: 0.03536831091365052
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,539,28103.8,539000,3.8074,9.93,-8.67,128.26




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-27_04-59-45
  done: false
  episode_len_mean: 127.47
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.799200000000012
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 6
  episodes_total: 2149
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2224355732391628
          cur_lr: 5.000000000000001e-05
          entropy: 1.9521892680062187
          entropy_coeff: 0.009999999999999998
          kl: 0.008566637790584537
          policy_loss: -0.08413126253419452
          total_loss: -0.08925779188672701
          vf_explained_var: 0.9023393392562866
          vf_loss: 0.01248983648709125
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,540,28202.7,540000,3.7992,9.93,-8.67,127.47




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-27_05-02-40
  done: false
  episode_len_mean: 130.52
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.789000000000012
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 11
  episodes_total: 2160
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2224355732391628
          cur_lr: 5.000000000000001e-05
          entropy: 1.935020515653822
          entropy_coeff: 0.009999999999999998
          kl: 0.0045516009227611525
          policy_loss: -0.16876012062033016
          total_loss: -0.15864361367291874
          vf_explained_var: 0.3271622359752655
          vf_loss: 0.028454274484991198
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,541,28377.8,541000,3.789,9.93,-8.67,130.52




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-27_05-03-27
  done: false
  episode_len_mean: 133.39
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.697800000000013
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 3
  episodes_total: 2163
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1112177866195814
          cur_lr: 5.000000000000001e-05
          entropy: 2.110244768195682
          entropy_coeff: 0.009999999999999998
          kl: 0.011732917772159122
          policy_loss: -0.06492426449226009
          total_loss: -0.06794272571181258
          vf_explained_var: 0.8625754714012146
          vf_loss: 0.016779076799543366
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,542,28424.5,542000,3.6978,9.93,-8.67,133.39




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-27_05-04-27
  done: false
  episode_len_mean: 129.85
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.723000000000012
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 5
  episodes_total: 2168
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1112177866195814
          cur_lr: 5.000000000000001e-05
          entropy: 2.108553198973338
          entropy_coeff: 0.009999999999999998
          kl: 0.012508493261690528
          policy_loss: -0.17353933428724608
          total_loss: -0.16234474008282027
          vf_explained_var: 0.8650817275047302
          vf_loss: 0.030888957913137144
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,543,28484.5,543000,3.723,9.93,-8.67,129.85




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-27_05-05-32
  done: false
  episode_len_mean: 134.27
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.6472000000000135
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 4
  episodes_total: 2172
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1112177866195814
          cur_lr: 5.000000000000001e-05
          entropy: 1.946756276819441
          entropy_coeff: 0.009999999999999998
          kl: 0.01569661727529067
          policy_loss: -0.1356973591984974
          total_loss: -0.13028848566528822
          vf_explained_var: 0.773313045501709
          vf_loss: 0.023130693048652676
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,544,28549,544000,3.6472,9.93,-8.67,134.27


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-27_05-06-00
  done: false
  episode_len_mean: 140.39
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.4330000000000127
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 3
  episodes_total: 2175
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1112177866195814
          cur_lr: 5.000000000000001e-05
          entropy: 2.1604598336749605
          entropy_coeff: 0.009999999999999998
          kl: 0.01653260701409384
          policy_loss: -0.08066957741975785
          total_loss: 0.021426494129829936
          vf_explained_var: 0.5749815106391907
          vf_loss: 0.12186195063922141
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,545,28576.8,545000,3.433,9.93,-8.67,140.39




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-27_05-06-59
  done: false
  episode_len_mean: 145.03
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 3.2395000000000125
  episode_reward_min: -8.669999999999959
  episodes_this_iter: 4
  episodes_total: 2179
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1112177866195814
          cur_lr: 5.000000000000001e-05
          entropy: 1.9474928193622165
          entropy_coeff: 0.009999999999999998
          kl: 0.15829596808995391
          policy_loss: 0.10896521359682083
          total_loss: 0.6863110806999935
          vf_explained_var: 0.2339855134487152
          vf_loss: 0.5792154600429866
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,546,28635.9,546000,3.2395,9.93,-8.67,145.03


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-27_05-07-25
  done: false
  episode_len_mean: 151.56
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.9085000000000134
  episode_reward_min: -10.88999999999994
  episodes_this_iter: 3
  episodes_total: 2182
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16682667992937208
          cur_lr: 5.000000000000001e-05
          entropy: 1.5992809467845492
          entropy_coeff: 0.009999999999999998
          kl: 0.025609078516729618
          policy_loss: -0.09681599042895768
          total_loss: 0.24893217210968335
          vf_explained_var: 0.3898363709449768
          vf_loss: 0.35746869266861014
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,547,28662.5,547000,2.9085,9.93,-10.89,151.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-27_05-08-22
  done: false
  episode_len_mean: 160.86
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.700300000000014
  episode_reward_min: -10.88999999999994
  episodes_this_iter: 3
  episodes_total: 2185
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.3470121118757459
          entropy_coeff: 0.009999999999999998
          kl: 0.0179385791917318
          policy_loss: 0.07411421140034993
          total_loss: 0.45505568914943273
          vf_explained_var: 0.08124815672636032
          vf_loss: 0.3899226483371523
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,548,28719.2,548000,2.7003,9.93,-10.89,160.86


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-27_05-08-47
  done: false
  episode_len_mean: 168.03
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.5603000000000145
  episode_reward_min: -10.88999999999994
  episodes_this_iter: 2
  episodes_total: 2187
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 0.957253157430225
          entropy_coeff: 0.009999999999999998
          kl: 0.008828058758579536
          policy_loss: 0.007836666661832067
          total_loss: 0.30146666011876533
          vf_explained_var: 0.6066108345985413
          vf_loss: 0.3009933873597119
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,549,28743.8,549000,2.5603,9.93,-10.89,168.03


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-27_05-09-06
  done: false
  episode_len_mean: 172.69
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.3535000000000155
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 2
  episodes_total: 2189
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.7336464140150283
          entropy_coeff: 0.009999999999999998
          kl: 0.011844712609130227
          policy_loss: 0.06021096623606152
          total_loss: 0.2613750541375743
          vf_explained_var: 0.5413718819618225
          vf_loss: 0.2155365340411663
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,550,28763.4,550000,2.3535,9.93,-11.3,172.69


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-27_05-09-29
  done: false
  episode_len_mean: 174.98
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.4112000000000156
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 2
  episodes_total: 2191
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.0685572604338327
          entropy_coeff: 0.009999999999999998
          kl: 0.008315816263132063
          policy_loss: 0.022891060676839617
          total_loss: 0.3797274465362231
          vf_explained_var: 0.3604948818683624
          vf_loss: 0.3654410027795368
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,551,28785.8,551000,2.4112,9.93,-11.3,174.98


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-27_05-09-49
  done: false
  episode_len_mean: 180.59
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 2.259300000000017
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 2
  episodes_total: 2193
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.512510311603546
          entropy_coeff: 0.009999999999999998
          kl: 0.01538661279924922
          policy_loss: -0.04877232927829027
          total_loss: 0.41236048407024806
          vf_explained_var: 0.38597843050956726
          vf_loss: 0.47240757721786697
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,552,28806.3,552000,2.2593,9.93,-11.3,180.59


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-27_05-10-12
  done: false
  episode_len_mean: 194.59
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 1.883700000000018
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 3
  episodes_total: 2196
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.3007833030488756
          entropy_coeff: 0.009999999999999998
          kl: 0.017884087586784524
          policy_loss: -0.05335258493820826
          total_loss: 0.2585846659209993
          vf_explained_var: 0.31926867365837097
          vf_loss: 0.32046976933876675
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,553,28829,553000,1.8837,9.93,-11.3,194.59




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-27_05-10-57
  done: false
  episode_len_mean: 202.92
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 1.6708000000000185
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 3
  episodes_total: 2199
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.2627405994468266
          entropy_coeff: 0.009999999999999998
          kl: 0.01088137925731445
          policy_loss: -0.07392122476465172
          total_loss: 0.17148749633795685
          vf_explained_var: 0.15776848793029785
          vf_loss: 0.2553131716118919
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,554,28873.9,554000,1.6708,9.93,-11.3,202.92




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-27_05-11-53
  done: false
  episode_len_mean: 209.34
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 1.5951000000000193
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 3
  episodes_total: 2202
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.8522265063391792
          entropy_coeff: 0.009999999999999998
          kl: 0.016663041686143156
          policy_loss: -0.13325514954825243
          total_loss: 0.6046662751171324
          vf_explained_var: 0.3817618191242218
          vf_loss: 0.7522739330927531
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,555,28930.6,555000,1.5951,9.93,-11.3,209.34




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-27_05-12-38
  done: false
  episode_len_mean: 217.0
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 1.4353000000000193
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 3
  episodes_total: 2205
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25024001989405814
          cur_lr: 5.000000000000001e-05
          entropy: 1.1276059514946408
          entropy_coeff: 0.009999999999999998
          kl: 0.020473874582587728
          policy_loss: 0.09370189276006487
          total_loss: 0.6770645532343122
          vf_explained_var: 0.528502345085144
          vf_loss: 0.5895153360234366
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,556,28974.9,556000,1.4353,9.93,-11.3,217




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-27_05-13-39
  done: false
  episode_len_mean: 223.58
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 1.15570000000002
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 3
  episodes_total: 2208
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3753600298410872
          cur_lr: 5.000000000000001e-05
          entropy: 1.8608987477090624
          entropy_coeff: 0.009999999999999998
          kl: 0.016937505832339373
          policy_loss: -0.0676473417215877
          total_loss: 0.17247656418217552
          vf_explained_var: 0.6033308506011963
          vf_loss: 0.2523752324283123
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,557,29036,557000,1.1557,9.93,-11.3,223.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-27_05-14-41
  done: false
  episode_len_mean: 232.94
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.9328000000000205
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 4
  episodes_total: 2212
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3753600298410872
          cur_lr: 5.000000000000001e-05
          entropy: 1.051316973235872
          entropy_coeff: 0.009999999999999998
          kl: 0.01899538220856757
          policy_loss: 0.11442149927218755
          total_loss: 0.6971673739453157
          vf_explained_var: 0.6600651144981384
          vf_loss: 0.5861289358801312
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,558,29097.9,558000,0.9328,9.93,-11.3,232.94




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-27_05-16-00
  done: false
  episode_len_mean: 236.62
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.883600000000021
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 4
  episodes_total: 2216
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3753600298410872
          cur_lr: 5.000000000000001e-05
          entropy: 1.6692178434795804
          entropy_coeff: 0.009999999999999998
          kl: 0.02159080139915503
          policy_loss: -0.067451301879353
          total_loss: 0.4002081340385808
          vf_explained_var: 0.6645035743713379
          vf_loss: 0.47624728580315906
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,559,29177.3,559000,0.8836,9.93,-11.3,236.62




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-27_05-17-03
  done: false
  episode_len_mean: 238.33
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.7939000000000209
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 4
  episodes_total: 2220
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5630400447616307
          cur_lr: 5.000000000000001e-05
          entropy: 1.3687485893567404
          entropy_coeff: 0.009999999999999998
          kl: 0.0171203431122521
          policy_loss: 0.04552156759632958
          total_loss: 0.4915357554952304
          vf_explained_var: 0.6390172243118286
          vf_loss: 0.4500622335407469
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,560,29240.2,560000,0.7939,9.93,-11.3,238.33




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-27_05-18-01
  done: false
  episode_len_mean: 241.66
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.6722000000000216
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 4
  episodes_total: 2224
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5630400447616307
          cur_lr: 5.000000000000001e-05
          entropy: 2.1011491338411967
          entropy_coeff: 0.009999999999999998
          kl: 0.008954741569809688
          policy_loss: 0.19841546333498425
          total_loss: 0.3815595279137293
          vf_explained_var: 0.30361202359199524
          vf_loss: 0.19911367238188785
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,561,29297.6,561000,0.6722,9.93,-11.3,241.66




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-27_05-18-37
  done: false
  episode_len_mean: 247.31
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.5181000000000219
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 1
  episodes_total: 2225
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5630400447616307
          cur_lr: 5.000000000000001e-05
          entropy: 1.6894480347633363
          entropy_coeff: 0.009999999999999998
          kl: 0.014492012192405883
          policy_loss: 0.06751319865385691
          total_loss: 0.1982758426003986
          vf_explained_var: 0.3494747579097748
          vf_loss: 0.13949753809720278
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,562,29334.4,562000,0.5181,9.93,-11.3,247.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-27_05-20-36
  done: false
  episode_len_mean: 253.8
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.48510000000002246
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 7
  episodes_total: 2232
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5630400447616307
          cur_lr: 5.000000000000001e-05
          entropy: 1.4455244700113932
          entropy_coeff: 0.009999999999999998
          kl: 0.025327025227685675
          policy_loss: -0.07330302769939105
          total_loss: 0.8058356884039111
          vf_explained_var: 0.6155346035957336
          vf_loss: 0.8793338331911299
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,563,29452.6,563000,0.4851,9.93,-11.3,253.8




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-27_05-23-09
  done: false
  episode_len_mean: 256.7
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.48530000000002216
  episode_reward_min: -11.299999999999942
  episodes_this_iter: 8
  episodes_total: 2240
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.5658951428201464
          entropy_coeff: 0.009999999999999998
          kl: 0.013555305200304938
          policy_loss: 0.03623309797710843
          total_loss: 0.8332809229691823
          vf_explained_var: 0.520719587802887
          vf_loss: 0.8012584997547998
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,564,29605.6,564000,0.4853,9.93,-11.3,256.7


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-27_05-23-31
  done: false
  episode_len_mean: 261.32
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.2637000000000228
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2242
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.7307927846908568
          entropy_coeff: 0.009999999999999998
          kl: 0.01736778693686802
          policy_loss: 0.015662220120429993
          total_loss: 0.5717601516180568
          vf_explained_var: 0.29871666431427
          vf_loss: 0.5587377241916127
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,565,29628.1,565000,0.2637,9.93,-13.76,261.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-27_05-26-02
  done: false
  episode_len_mean: 263.38
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.29860000000002196
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 7
  episodes_total: 2249
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.6633813169267442
          entropy_coeff: 0.009999999999999998
          kl: 0.012018116369619096
          policy_loss: 0.04982167267137104
          total_loss: 0.7022027158074908
          vf_explained_var: 0.5828573703765869
          vf_loss: 0.6588648325867124
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,566,29779.2,566000,0.2986,9.93,-13.76,263.38




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-27_05-26-42
  done: false
  episode_len_mean: 266.24
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.25240000000002244
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2251
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.8640348725848728
          entropy_coeff: 0.009999999999999998
          kl: 0.005094532568217403
          policy_loss: -0.12375483906103504
          total_loss: 0.06422825675043795
          vf_explained_var: 0.5155735611915588
          vf_loss: 0.2023208071788152
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,567,29819.1,567000,0.2524,9.93,-13.76,266.24




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-27_05-27-17
  done: false
  episode_len_mean: 273.72
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 0.11200000000002332
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2253
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.3867952565352122
          entropy_coeff: 0.009999999999999998
          kl: 0.0025393703031325325
          policy_loss: 0.05266930133932167
          total_loss: 0.17983219251036645
          vf_explained_var: 0.439062237739563
          vf_loss: 0.13888619432432783
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,568,29853.9,568000,0.112,9.93,-13.76,273.72




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-27_05-27-57
  done: false
  episode_len_mean: 284.52
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: -0.2419999999999756
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2256
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.5569517374038697
          entropy_coeff: 0.009999999999999998
          kl: 0.011841841423158467
          policy_loss: 0.03004824924800131
          total_loss: 0.25324083910220196
          vf_explained_var: 0.6390843987464905
          vf_loss: 0.23376153401202626
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,569,29893.9,569000,-0.242,9.92,-13.76,284.52




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-27_05-28-53
  done: false
  episode_len_mean: 289.39
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: -0.3344999999999749
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2259
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.670816671848297
          entropy_coeff: 0.009999999999999998
          kl: 0.00931542588027935
          policy_loss: -0.08885085334380467
          total_loss: 0.20702946436487965
          vf_explained_var: 0.6717248558998108
          vf_loss: 0.3086547660330931
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,570,29949.6,570000,-0.3345,9.92,-13.76,289.39




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-27_05-29-31
  done: false
  episode_len_mean: 295.57
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.46779999999997424
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2262
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.566180588139428
          entropy_coeff: 0.009999999999999998
          kl: 0.016033919273945912
          policy_loss: 0.05714340549376276
          total_loss: 0.45188258008824456
          vf_explained_var: 0.3485754728317261
          vf_loss: 0.40363017751110924
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,571,29987.8,571000,-0.4678,9.8,-13.76,295.57


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-27_05-29-50
  done: false
  episode_len_mean: 297.49
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5299999999999738
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2263
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.559641538725959
          entropy_coeff: 0.009999999999999998
          kl: 0.012463595850695085
          policy_loss: 0.025308118305272527
          total_loss: 0.31656204482747446
          vf_explained_var: 0.48750126361846924
          vf_loss: 0.3015872172183461
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,572,30007.2,572000,-0.53,9.8,-13.76,297.49




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-27_05-31-45
  done: false
  episode_len_mean: 292.99
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.22599999999997486
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 7
  episodes_total: 2270
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.2577415757709078
          entropy_coeff: 0.009999999999999998
          kl: 0.013148085558293054
          policy_loss: 0.14603231698274613
          total_loss: 0.6323247689339849
          vf_explained_var: 0.5231624841690063
          vf_loss: 0.4933176875114441
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,573,30121.4,573000,-0.226,9.8,-13.76,292.99


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-27_05-32-08
  done: false
  episode_len_mean: 301.91
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.4280999999999737
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2272
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.469133616818322
          entropy_coeff: 0.009999999999999998
          kl: 0.014222135428917405
          policy_loss: 0.05880148344569736
          total_loss: 0.5670811134907935
          vf_explained_var: 0.5121554732322693
          vf_loss: 0.5169652428891923
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,574,30144.3,574000,-0.4281,9.8,-13.76,301.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-27_05-32-27
  done: false
  episode_len_mean: 304.75
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.38899999999997353
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2274
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.900863328244951
          entropy_coeff: 0.009999999999999998
          kl: 0.01129234503610961
          policy_loss: -0.04051348807083236
          total_loss: 0.4308162107856737
          vf_explained_var: 0.19748352468013763
          vf_loss: 0.48556980142990747
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,575,30163.8,575000,-0.389,9.8,-13.76,304.75


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-27_05-32-44
  done: false
  episode_len_mean: 307.31
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.3966999999999728
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2275
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.6463931494288975
          entropy_coeff: 0.009999999999999998
          kl: 0.008894736592484884
          policy_loss: -0.003208655243118604
          total_loss: 0.2729764289326138
          vf_explained_var: 0.3653833568096161
          vf_loss: 0.2888929428325759
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,576,30180.8,576000,-0.3967,9.8,-13.76,307.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-27_05-33-19
  done: false
  episode_len_mean: 309.95
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.3980999999999718
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2277
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.41804352733824
          entropy_coeff: 0.009999999999999998
          kl: 0.009258126631204563
          policy_loss: -0.026155369356274605
          total_loss: 0.5900063355763753
          vf_explained_var: 0.30996087193489075
          vf_loss: 0.6264326161808438
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,577,30215.8,577000,-0.3981,9.8,-13.76,309.95


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-27_05-33-38
  done: false
  episode_len_mean: 320.53
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5775999999999708
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2279
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.1395279718769922
          entropy_coeff: 0.009999999999999998
          kl: 0.011582665334582456
          policy_loss: 0.002344273527463277
          total_loss: 0.41977949548098775
          vf_explained_var: 0.5480680465698242
          vf_loss: 0.4239393777317471
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,578,30234.4,578000,-0.5776,9.8,-13.76,320.53




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-27_05-34-30
  done: false
  episode_len_mean: 319.38
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.4154999999999713
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2282
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.2713834702968598
          entropy_coeff: 0.009999999999999998
          kl: 0.016612134138558884
          policy_loss: 0.10820640731188987
          total_loss: 0.6105923033422894
          vf_explained_var: 0.2509578764438629
          vf_loss: 0.508084752327866
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,579,30286.9,579000,-0.4155,9.8,-13.76,319.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-27_05-34-49
  done: false
  episode_len_mean: 320.47
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.36629999999997154
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2283
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.3789465566476187
          entropy_coeff: 0.009999999999999998
          kl: 0.014671842997326963
          policy_loss: 0.028985311463475227
          total_loss: 0.46809294985400307
          vf_explained_var: 0.33525168895721436
          vf_loss: 0.4467014806138145
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,580,30305.5,580000,-0.3663,9.8,-13.76,320.47


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-27_05-35-07
  done: false
  episode_len_mean: 327.65
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5225999999999701
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2285
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.633412981033325
          entropy_coeff: 0.009999999999999998
          kl: 0.012348986488526571
          policy_loss: 0.1313946106367641
          total_loss: 0.3263186970518695
          vf_explained_var: 0.637689471244812
          vf_loss: 0.20604348302715356
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,581,30323.4,581000,-0.5226,9.8,-13.76,327.65


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-27_05-35-23
  done: false
  episode_len_mean: 332.74
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5918999999999691
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2287
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.414782296286689
          entropy_coeff: 0.009999999999999998
          kl: 0.013225435704642156
          policy_loss: -0.14983304796947372
          total_loss: 0.11633916935986943
          vf_explained_var: 0.5488587021827698
          vf_loss: 0.27473520172966853
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,582,30339.1,582000,-0.5919,9.8,-13.76,332.74




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-27_05-36-17
  done: false
  episode_len_mean: 326.03
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.35289999999996996
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2290
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42228003357122307
          cur_lr: 5.000000000000001e-05
          entropy: 1.5100896782345241
          entropy_coeff: 0.009999999999999998
          kl: 0.03665168824514033
          policy_loss: -0.016197632915443846
          total_loss: 0.42362735577755506
          vf_explained_var: 0.30663245916366577
          vf_loss: 0.43944860729906293
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,583,30393.1,583000,-0.3529,9.8,-13.76,326.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-27_05-36-51
  done: false
  episode_len_mean: 326.85
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.3092999999999702
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2291
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6334200503568346
          cur_lr: 5.000000000000001e-05
          entropy: 1.0657227734724681
          entropy_coeff: 0.009999999999999998
          kl: 0.008565682645701086
          policy_loss: -0.04816197488043043
          total_loss: 0.3254573944542143
          vf_explained_var: 0.5764315724372864
          vf_loss: 0.37885092033280265
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,584,30427.6,584000,-0.3093,9.8,-13.76,326.85


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-27_05-37-09
  done: false
  episode_len_mean: 330.68
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.3264999999999699
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2293
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6334200503568346
          cur_lr: 5.000000000000001e-05
          entropy: 1.8467044644885593
          entropy_coeff: 0.009999999999999998
          kl: 0.025260001156848903
          policy_loss: 0.002177796016136805
          total_loss: 0.5623498409986496
          vf_explained_var: 0.37550175189971924
          vf_loss: 0.5626388927300771
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,585,30445.4,585000,-0.3265,9.8,-13.76,330.68




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-27_05-37-45
  done: false
  episode_len_mean: 329.79
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.23529999999997017
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2296
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9501300755352523
          cur_lr: 5.000000000000001e-05
          entropy: 1.4185053706169128
          entropy_coeff: 0.009999999999999998
          kl: 0.009509567628814252
          policy_loss: -0.04723807420167658
          total_loss: 0.35906266503863865
          vf_explained_var: 0.6183658838272095
          vf_loss: 0.41145046469238067
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,586,30481.9,586000,-0.2353,9.8,-13.76,329.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-27_05-38-00
  done: false
  episode_len_mean: 331.95
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.23529999999996942
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2297
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9501300755352523
          cur_lr: 5.000000000000001e-05
          entropy: 1.4544795950253804
          entropy_coeff: 0.009999999999999998
          kl: 0.0076173238888612915
          policy_loss: 0.05213591274287965
          total_loss: 0.3145394174589051
          vf_explained_var: 0.5022387504577637
          vf_loss: 0.2697108483976788
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,587,30496.9,587000,-0.2353,9.8,-13.76,331.95




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-27_05-39-29
  done: false
  episode_len_mean: 330.28
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.17909999999996934
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 4
  episodes_total: 2301
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9501300755352523
          cur_lr: 5.000000000000001e-05
          entropy: 1.2648780173725551
          entropy_coeff: 0.009999999999999998
          kl: 0.016468080176678182
          policy_loss: -0.05963662829664018
          total_loss: 0.6231868921054734
          vf_explained_var: 0.6079559922218323
          vf_loss: 0.6798254804478752
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,588,30585.1,588000,-0.1791,9.8,-13.76,330.28




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-27_05-40-08
  done: false
  episode_len_mean: 332.37
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.11259999999996896
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2304
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9501300755352523
          cur_lr: 5.000000000000001e-05
          entropy: 1.3398890124426948
          entropy_coeff: 0.009999999999999998
          kl: 0.020537923606161593
          policy_loss: -0.023931100592017175
          total_loss: 0.4888970888323254
          vf_explained_var: 0.5198366641998291
          vf_loss: 0.5067133832308981
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,589,30624.6,589000,-0.1126,9.8,-13.76,332.37


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-27_05-40-25
  done: false
  episode_len_mean: 336.5
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.25299999999996825
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 1
  episodes_total: 2305
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.6463233960999384
          entropy_coeff: 0.009999999999999998
          kl: 0.007072563954334833
          policy_loss: -0.20123240053653718
          total_loss: -0.09057083138161236
          vf_explained_var: 0.6440590620040894
          vf_loss: 0.11704501724905438
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,590,30641.2,590000,-0.253,9.8,-13.76,336.5




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-27_05-41-15
  done: false
  episode_len_mean: 344.31
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.41259999999996694
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2308
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.0035500407218934
          entropy_coeff: 0.009999999999999998
          kl: 0.008925643053297868
          policy_loss: 0.046377541290389165
          total_loss: 0.3564421555234326
          vf_explained_var: 0.5088234543800354
          vf_loss: 0.30737933615843455
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,591,30691.7,591000,-0.4126,9.8,-13.76,344.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-27_05-41-52
  done: false
  episode_len_mean: 343.9
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.3349999999999669
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2310
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.9850164572397868
          entropy_coeff: 0.009999999999999998
          kl: 0.006862868047196357
          policy_loss: -0.14189068381157185
          total_loss: 0.08603820157133871
          vf_explained_var: 0.24128331243991852
          vf_loss: 0.23799812821671368
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,592,30728.1,592000,-0.335,9.8,-13.76,343.9




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-27_05-42-27
  done: false
  episode_len_mean: 349.93
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.4886999999999658
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2312
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.72864611281289
          entropy_coeff: 0.009999999999999998
          kl: 0.00596946287344201
          policy_loss: -0.025913631393470696
          total_loss: 0.10397277297452093
          vf_explained_var: 0.044285841286182404
          vf_loss: 0.13866522222136457
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,593,30763.7,593000,-0.4887,9.8,-13.76,349.93




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-27_05-43-18
  done: false
  episode_len_mean: 351.5
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.39809999999996576
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2315
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.281593644618988
          entropy_coeff: 0.009999999999999998
          kl: 0.01062394233974093
          policy_loss: -0.19360523575709926
          total_loss: 0.15762551890479193
          vf_explained_var: 0.46093180775642395
          vf_loss: 0.34890550143188903
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,594,30814.6,594000,-0.3981,9.8,-13.76,351.5


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-27_05-43-37
  done: false
  episode_len_mean: 357.51
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5291999999999646
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2317
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.8477844013108147
          entropy_coeff: 0.009999999999999998
          kl: 0.006181802648712035
          policy_loss: 0.002918201560775439
          total_loss: 0.2664471009539233
          vf_explained_var: 0.5028960108757019
          vf_loss: 0.2731964749180608
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,595,30832.8,595000,-0.5292,9.8,-13.76,357.51




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-27_05-44-48
  done: false
  episode_len_mean: 354.65
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.44179999999996483
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 4
  episodes_total: 2321
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4251951133028784
          cur_lr: 5.000000000000001e-05
          entropy: 1.494267914030287
          entropy_coeff: 0.009999999999999998
          kl: 0.025942418443582443
          policy_loss: -0.018537209348546135
          total_loss: 0.7732614702648587
          vf_explained_var: 0.6853718757629395
          vf_loss: 0.7697683433691661
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,596,30903.9,596000,-0.4418,9.8,-13.76,354.65




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-27_05-45-21
  done: false
  episode_len_mean: 356.29
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.41609999999996483
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2323
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.5060768855942621
          entropy_coeff: 0.009999999999999998
          kl: 0.0059313555857297105
          policy_loss: -0.11372532095346186
          total_loss: 0.11428670038779577
          vf_explained_var: 0.5019403696060181
          vf_loss: 0.23039278458389972
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,597,30937.2,597000,-0.4161,9.8,-13.76,356.29




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-27_05-46-15
  done: false
  episode_len_mean: 358.91
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.388399999999965
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2326
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.5024325887362162
          entropy_coeff: 0.009999999999999998
          kl: 0.007265660424839851
          policy_loss: -0.09252629520164596
          total_loss: 0.290413236969875
          vf_explained_var: 0.6775919795036316
          vf_loss: 0.38243138417601585
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,598,30991.1,598000,-0.3884,9.8,-13.76,358.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-27_05-46-50
  done: false
  episode_len_mean: 364.46
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.5504999999999642
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 2
  episodes_total: 2328
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.5937271661228605
          entropy_coeff: 0.009999999999999998
          kl: 0.008654188052070803
          policy_loss: -0.09283769792980617
          total_loss: 0.2145633030268881
          vf_explained_var: 0.5908175706863403
          vf_loss: 0.30483741416699356
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,599,31026.4,599000,-0.5505,9.8,-13.76,364.46




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-27_05-47-25
  done: false
  episode_len_mean: 373.26
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.8780999999999622
  episode_reward_min: -13.759999999999936
  episodes_this_iter: 3
  episodes_total: 2331
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.5885991626315648
          entropy_coeff: 0.009999999999999998
          kl: 0.008205254032990142
          policy_loss: 0.11196058516701063
          total_loss: 0.3201725276807944
          vf_explained_var: 0.5549613237380981
          vf_loss: 0.20655680058730974
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,600,31060.9,600000,-0.8781,9.8,-13.76,373.26


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-27_05-47-42
  done: false
  episode_len_mean: 379.39
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -1.077199999999961
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2332
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.5875556959046258
          entropy_coeff: 0.009999999999999998
          kl: 0.0054056073105803
          policy_loss: -0.06587851407627264
          total_loss: 0.10517630154887835
          vf_explained_var: 0.5184046626091003
          vf_loss: 0.17537430516547628
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,601,31078,601000,-1.0772,9.8,-14.58,379.39


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-27_05-47-57
  done: false
  episode_len_mean: 380.41
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -1.1084999999999605
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2333
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.137792669954316
          cur_lr: 5.000000000000001e-05
          entropy: 1.0767623055312368
          entropy_coeff: 0.009999999999999998
          kl: 0.0030288133027250395
          policy_loss: -0.037264755968418384
          total_loss: 0.21659303520702652
          vf_explained_var: 0.3228014409542084
          vf_loss: 0.2581504406614436
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,602,31092.7,602000,-1.1085,9.8,-14.58,380.41




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-27_05-48-30
  done: false
  episode_len_mean: 392.01
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -1.4454999999999578
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2336
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.068896334977158
          cur_lr: 5.000000000000001e-05
          entropy: 1.5440449039141337
          entropy_coeff: 0.009999999999999998
          kl: 0.00972691521847019
          policy_loss: -0.04432283569541243
          total_loss: 0.2139647088944912
          vf_explained_var: 0.4968821704387665
          vf_loss: 0.26333093303773136
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,603,31126.1,603000,-1.4455,9.66,-14.58,392.01


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-27_05-48-47
  done: false
  episode_len_mean: 396.79
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -1.6300999999999568
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2337
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.068896334977158
          cur_lr: 5.000000000000001e-05
          entropy: 1.8573942793740166
          entropy_coeff: 0.009999999999999998
          kl: 0.0074340156395572244
          policy_loss: 0.06323893976708253
          total_loss: 0.23617343393464882
          vf_explained_var: 0.4389694333076477
          vf_loss: 0.18356224008732372
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 60400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,604,31143.3,604000,-1.6301,9.66,-14.58,396.79


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-27_05-49-03
  done: false
  episode_len_mean: 408.13
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -1.9009999999999556
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2339
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.068896334977158
          cur_lr: 5.000000000000001e-05
          entropy: 1.7468896481725904
          entropy_coeff: 0.009999999999999998
          kl: 0.013362155741033159
          policy_loss: 0.012199472553200192
          total_loss: 0.1576484702527523
          vf_explained_var: 0.6237393617630005
          vf_loss: 0.14863513567381434
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,605,31159,605000,-1.901,9.66,-14.58,408.13


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-27_05-49-21
  done: false
  episode_len_mean: 415.05
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -1.9995999999999554
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2341
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.068896334977158
          cur_lr: 5.000000000000001e-05
          entropy: 1.702115571498871
          entropy_coeff: 0.009999999999999998
          kl: 0.011051302175071599
          policy_loss: 0.05271272568239106
          total_loss: 0.39383805348641343
          vf_explained_var: 0.6382012367248535
          vf_loss: 0.3463337867986411
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,606,31176.6,606000,-1.9996,9.66,-14.58,415.05




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-27_05-49-59
  done: false
  episode_len_mean: 415.36
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -1.873299999999955
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2343
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.068896334977158
          cur_lr: 5.000000000000001e-05
          entropy: 1.8068928122520447
          entropy_coeff: 0.009999999999999998
          kl: 0.07407995085077258
          policy_loss: 0.03545822906825277
          total_loss: 0.9334858742853006
          vf_explained_var: 0.4365668296813965
          vf_loss: 0.8369127888232469
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,607,31214.6,607000,-1.8733,9.66,-14.58,415.36


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-27_05-50-15
  done: false
  episode_len_mean: 427.2
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.1006999999999536
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2345
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.603344502465738
          cur_lr: 5.000000000000001e-05
          entropy: 1.748195430967543
          entropy_coeff: 0.009999999999999998
          kl: 0.0046105797751758125
          policy_loss: 0.03649691674444411
          total_loss: 0.27617392672432794
          vf_explained_var: 0.24292869865894318
          vf_loss: 0.2497666155712472
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,608,31231.4,608000,-2.1007,9.66,-14.58,427.2


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-27_05-50-31
  done: false
  episode_len_mean: 433.02
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.1779999999999533
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2346
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.9572016543812223
          entropy_coeff: 0.009999999999999998
          kl: 0.00784290595211724
          policy_loss: 0.038628637376758784
          total_loss: 0.22899287930793233
          vf_explained_var: 0.4911979138851166
          vf_loss: 0.20364881532473697
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,609,31247.3,609000,-2.178,9.66,-14.58,433.02


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-27_05-50-51
  done: false
  episode_len_mean: 437.43
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.303099999999952
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2348
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.74510018825531
          entropy_coeff: 0.009999999999999998
          kl: 0.011894580035640069
          policy_loss: -0.073771039603485
          total_loss: 0.3185903441367878
          vf_explained_var: 0.557884931564331
          vf_loss: 0.4002768299645848
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,610,31266.7,610000,-2.3031,9.66,-14.58,437.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-27_05-51-25
  done: false
  episode_len_mean: 443.28
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.4406999999999512
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2351
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.739174066649543
          entropy_coeff: 0.009999999999999998
          kl: 0.009642718258888827
          policy_loss: -0.008533219910330243
          total_loss: 0.3841719619515869
          vf_explained_var: 0.5222167372703552
          vf_loss: 0.4023666228271193
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,611,31300.5,611000,-2.4407,9.66,-14.58,443.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-27_05-51-40
  done: false
  episode_len_mean: 443.55
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.415999999999951
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2352
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.9848327808909947
          entropy_coeff: 0.009999999999999998
          kl: 0.005581748099246909
          policy_loss: -0.07723879449897342
          total_loss: -0.06299261053403218
          vf_explained_var: 0.5775437951087952
          vf_loss: 0.029619780578650536
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 6120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,612,31316,612000,-2.416,9.66,-14.58,443.55


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-27_05-51-55
  done: false
  episode_len_mean: 447.16
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.5254999999999503
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2354
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.4746800985601214
          entropy_coeff: 0.009999999999999998
          kl: 0.005322609195119396
          policy_loss: 0.09323088808192147
          total_loss: 0.18276887966526878
          vf_explained_var: 0.4919469654560089
          vf_loss: 0.10001781214701219
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,613,31330.9,613000,-2.5255,9.66,-14.58,447.16




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-27_05-52-31
  done: false
  episode_len_mean: 446.0
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.436199999999951
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2356
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 2.225720689031813
          entropy_coeff: 0.009999999999999998
          kl: 0.008089829271609833
          policy_loss: 0.06389586478471757
          total_loss: 0.19208507868978714
          vf_explained_var: 0.29072675108909607
          vf_loss: 0.14396102930315666
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,614,31367,614000,-2.4362,9.66,-14.58,446




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-27_05-53-07
  done: false
  episode_len_mean: 447.86
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.4924999999999504
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2359
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 2.168194532394409
          entropy_coeff: 0.009999999999999998
          kl: 0.006027051056467138
          policy_loss: -0.2131381054305368
          total_loss: -0.1392909180579914
          vf_explained_var: 0.5593042373657227
          vf_loss: 0.09069741200655698
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,615,31402.3,615000,-2.4925,9.66,-14.58,447.86




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-27_05-53-45
  done: false
  episode_len_mean: 450.59
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.53759999999995
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2362
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 2.0311781578593786
          entropy_coeff: 0.009999999999999998
          kl: 0.008580046667639202
          policy_loss: 0.008659511059522628
          total_loss: 0.28687130726046034
          vf_explained_var: 0.369459331035614
          vf_loss: 0.29164518867101935
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,616,31440.5,616000,-2.5376,9.66,-14.58,450.59


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-27_05-54-05
  done: false
  episode_len_mean: 447.15
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.4307999999999508
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2364
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.964883397685157
          entropy_coeff: 0.009999999999999998
          kl: 0.009653807075379502
          policy_loss: 0.021152372078763113
          total_loss: 0.2174762312736776
          vf_explained_var: 0.3630954623222351
          vf_loss: 0.20823350693616602
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,617,31461.2,617000,-2.4308,9.66,-14.58,447.15


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-27_05-54-22
  done: false
  episode_len_mean: 456.35
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.6520999999999497
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2366
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.8823073625564575
          entropy_coeff: 0.009999999999999998
          kl: 0.00841470911425242
          policy_loss: -0.10889491008387672
          total_loss: 0.18004769566986295
          vf_explained_var: 0.3995082974433899
          vf_loss: 0.30101983849373126
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,618,31478.2,618000,-2.6521,9.66,-14.58,456.35


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-27_05-54-39
  done: false
  episode_len_mean: 459.2
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.694299999999949
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2367
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.9493513955010309
          entropy_coeff: 0.009999999999999998
          kl: 0.00527589645135254
          policy_loss: 0.13158571061988672
          total_loss: 0.13762562562608058
          vf_explained_var: 0.558136522769928
          vf_loss: 0.02130388851898412
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,619,31494.7,619000,-2.6943,9.66,-14.58,459.2




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-27_05-55-14
  done: false
  episode_len_mean: 468.74
  episode_media: {}
  episode_reward_max: 9.660000000000004
  episode_reward_mean: -2.8984999999999492
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2369
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.7857708228958977
          entropy_coeff: 0.009999999999999998
          kl: 0.008119661068884515
          policy_loss: 0.17385137072867818
          total_loss: 0.3484636666874091
          vf_explained_var: 0.5943260788917542
          vf_loss: 0.18596069949368635
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,620,31529.2,620000,-2.8985,9.66,-14.58,468.74


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-27_05-55-33
  done: false
  episode_len_mean: 473.83
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.003499999999948
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2371
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 2.024201358689202
          entropy_coeff: 0.009999999999999998
          kl: 0.007511509399751482
          policy_loss: -0.08068852341837353
          total_loss: -0.020905437817176183
          vf_explained_var: 0.21674686670303345
          vf_loss: 0.07400333437447747
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 6210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,621,31548.9,621000,-3.0035,7.96,-14.58,473.83


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-27_05-55-50
  done: false
  episode_len_mean: 474.13
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.030399999999948
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2372
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.9175964328977797
          entropy_coeff: 0.009999999999999998
          kl: 0.01222624266099144
          policy_loss: -0.06836012448701594
          total_loss: -0.058109817860855
          vf_explained_var: 0.6524533033370972
          vf_loss: 0.019624829002552562
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,622,31565.3,622000,-3.0304,7.96,-14.58,474.13


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-27_05-56-06
  done: false
  episode_len_mean: 477.2
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.0901999999999465
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2374
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.9615858448876275
          entropy_coeff: 0.009999999999999998
          kl: 0.00940743650888652
          policy_loss: 0.05866180393430922
          total_loss: 0.18334931515985065
          vf_explained_var: 0.4657707214355469
          vf_loss: 0.13676168616447185
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,623,31582,623000,-3.0902,7.96,-14.58,477.2


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-27_05-56-24
  done: false
  episode_len_mean: 475.23
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.0488999999999478
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2376
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 2.0163243797090318
          entropy_coeff: 0.009999999999999998
          kl: 0.0069908740685281755
          policy_loss: -0.09307911569873492
          total_loss: -0.09328518791331185
          vf_explained_var: 0.3302997946739197
          vf_loss: 0.014352779750091334
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,624,31600.1,624000,-3.0489,7.96,-14.58,475.23


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-27_05-56-39
  done: false
  episode_len_mean: 477.91
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.137799999999947
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2377
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.648399563630422
          entropy_coeff: 0.009999999999999998
          kl: 0.009829316540511475
          policy_loss: -0.08069522421186169
          total_loss: 0.067585687742879
          vf_explained_var: 0.26474639773368835
          vf_loss: 0.15688501231424096
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,625,31614.5,625000,-3.1378,7.96,-14.58,477.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-27_05-56-55
  done: false
  episode_len_mean: 479.53
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.1794999999999463
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2379
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.7550653047031826
          entropy_coeff: 0.009999999999999998
          kl: 0.009751209807405656
          policy_loss: 0.04793620448973444
          total_loss: 0.09329293372316493
          vf_explained_var: -0.3034621477127075
          vf_loss: 0.05509010844010239
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 62600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,626,31631,626000,-3.1795,7.96,-14.58,479.53


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-27_05-57-11
  done: false
  episode_len_mean: 481.9
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.248399999999945
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2381
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.834433032406701
          entropy_coeff: 0.009999999999999998
          kl: 0.005893816802093157
          policy_loss: 0.06463550710015827
          total_loss: 0.06113554421398375
          vf_explained_var: 0.1615619957447052
          vf_loss: 0.01011945298458967
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,627,31646.8,627000,-3.2484,7.96,-14.58,481.9


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-27_05-57-26
  done: false
  episode_len_mean: 488.21
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.352199999999944
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2382
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.801672251232869
          cur_lr: 5.000000000000001e-05
          entropy: 1.841899331410726
          entropy_coeff: 0.009999999999999998
          kl: 0.004224142466013506
          policy_loss: -0.03055383919013871
          total_loss: -0.03290601782500744
          vf_explained_var: -0.36020922660827637
          vf_loss: 0.012680438005675872
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,628,31661.2,628000,-3.3522,7.96,-14.58,488.21


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-27_05-57-42
  done: false
  episode_len_mean: 488.75
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.345599999999944
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2384
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.6235658685366312
          entropy_coeff: 0.009999999999999998
          kl: 0.009738913572514038
          policy_loss: -0.018977928658326468
          total_loss: -0.015406658086511824
          vf_explained_var: 0.7243502140045166
          vf_loss: 0.0159032218494556
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,629,31677.2,629000,-3.3456,7.96,-14.58,488.75


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-27_05-57-57
  done: false
  episode_len_mean: 489.67
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.346699999999944
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2385
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.7624103837543064
          entropy_coeff: 0.009999999999999998
          kl: 0.011402375961511094
          policy_loss: -0.032897322790490256
          total_loss: 0.09464740637275908
          vf_explained_var: 0.3021707832813263
          vf_loss: 0.1405983500337849
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 63000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,630,31692.5,630000,-3.3467,7.96,-14.58,489.67


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-27_05-58-12
  done: false
  episode_len_mean: 490.43
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.327799999999944
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2387
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.850740283065372
          entropy_coeff: 0.009999999999999998
          kl: 0.007706431372644238
          policy_loss: -0.0406924941473537
          total_loss: -0.024918856554561192
          vf_explained_var: -0.4765196144580841
          vf_loss: 0.031192024182140208
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,631,31707.2,631000,-3.3278,7.96,-14.58,490.43


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-27_05-58-30
  done: false
  episode_len_mean: 490.0
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.3067999999999445
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2388
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.845888113975525
          entropy_coeff: 0.009999999999999998
          kl: 0.00868531566485608
          policy_loss: -0.03607989094323582
          total_loss: -0.01718051474955347
          vf_explained_var: 0.5071998834609985
          vf_loss: 0.03387686785103546
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,632,31725.8,632000,-3.3068,7.96,-14.58,490


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-27_05-58-48
  done: false
  episode_len_mean: 499.33
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.498999999999943
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2390
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.919203027089437
          entropy_coeff: 0.009999999999999998
          kl: 0.012965437825834196
          policy_loss: 0.07315900673468907
          total_loss: 0.17480938111742336
          vf_explained_var: 0.6711323857307434
          vf_loss: 0.11564539079036977
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,633,31743.3,633000,-3.499,7.96,-14.58,499.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-27_05-59-04
  done: false
  episode_len_mean: 499.45
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.554899999999942
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2392
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.761874384350247
          entropy_coeff: 0.009999999999999998
          kl: 0.011917727590388378
          policy_loss: -0.025193016231060027
          total_loss: 0.002676659408542845
          vf_explained_var: 0.5676082968711853
          vf_loss: 0.04071136269097527
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 6340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,634,31759.4,634000,-3.5549,7.96,-14.58,499.45


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-27_05-59-20
  done: false
  episode_len_mean: 500.54
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.5500999999999423
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2393
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.802342579099867
          entropy_coeff: 0.009999999999999998
          kl: 0.013519737929747953
          policy_loss: -0.138179195434269
          total_loss: -0.07821996610404716
          vf_explained_var: -0.2666940689086914
          vf_loss: 0.07256345537413532
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 63500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,635,31775.1,635000,-3.5501,7.96,-14.58,500.54


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-27_05-59-35
  done: false
  episode_len_mean: 506.16
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.7170999999999412
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2395
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.8577978928883871
          entropy_coeff: 0.009999999999999998
          kl: 0.008058851948512588
          policy_loss: -0.016396630224254395
          total_loss: 0.013141981760660807
          vf_explained_var: 0.11945796757936478
          vf_loss: 0.044886310269228284
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,636,31790.5,636000,-3.7171,7.96,-14.58,506.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-27_05-59-50
  done: false
  episode_len_mean: 506.57
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.735199999999941
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2396
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 2.02400672170851
          entropy_coeff: 0.009999999999999998
          kl: 0.006243477659949459
          policy_loss: 0.22842605171932115
          total_loss: 0.21731395655208163
          vf_explained_var: -0.2477981299161911
          vf_loss: 0.006625358602226091
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,637,31805.5,637000,-3.7352,7.96,-14.58,506.57


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-27_06-00-09
  done: false
  episode_len_mean: 506.45
  episode_media: {}
  episode_reward_max: 7.960000000000018
  episode_reward_mean: -3.711699999999942
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2398
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4008361256164345
          cur_lr: 5.000000000000001e-05
          entropy: 1.9279461397065056
          entropy_coeff: 0.009999999999999998
          kl: 0.02195924388107099
          policy_loss: -0.08429894361438023
          total_loss: 0.05510411156962315
          vf_explained_var: 0.526257336139679
          vf_loss: 0.1498804591389166
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,638,31824.7,638000,-3.7117,7.96,-14.58,506.45




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-27_06-00-42
  done: false
  episode_len_mean: 516.71
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -3.9556999999999403
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2400
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6012541884246518
          cur_lr: 5.000000000000001e-05
          entropy: 1.5287062320444318
          entropy_coeff: 0.009999999999999998
          kl: 0.008871253793962848
          policy_loss: -0.027770901937037705
          total_loss: -0.016975927187336814
          vf_explained_var: 0.36042556166648865
          vf_loss: 0.02074815857793308
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,639,31857.4,639000,-3.9557,7.18,-14.58,516.71


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-27_06-00-59
  done: false
  episode_len_mean: 520.39
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.033199999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2401
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6012541884246518
          cur_lr: 5.000000000000001e-05
          entropy: 2.091659853193495
          entropy_coeff: 0.009999999999999998
          kl: 0.004745819656525847
          policy_loss: -0.02436101676689254
          total_loss: -0.036312742365731135
          vf_explained_var: 0.11263960599899292
          vf_loss: 0.0061114271598247194
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,640,31874.4,640000,-4.0332,7.18,-14.58,520.39


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-27_06-01-18
  done: false
  episode_len_mean: 523.13
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.091099999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2403
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3006270942123259
          cur_lr: 5.000000000000001e-05
          entropy: 1.9584858708911472
          entropy_coeff: 0.009999999999999998
          kl: 0.020141178697331005
          policy_loss: -0.10705564415289295
          total_loss: 0.010429864273303086
          vf_explained_var: 0.5627822875976562
          vf_loss: 0.13101538223110967
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 6410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,641,31893.3,641000,-4.0911,7.18,-14.58,523.13


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-27_06-01-36
  done: false
  episode_len_mean: 522.7
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.06199999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2405
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45094064131848877
          cur_lr: 5.000000000000001e-05
          entropy: 2.026220821009742
          entropy_coeff: 0.009999999999999998
          kl: 0.015366508466281579
          policy_loss: 0.043780621969037585
          total_loss: 0.12256619969589842
          vf_explained_var: 0.11306548863649368
          vf_loss: 0.09211839984378054
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,642,31911.1,642000,-4.062,7.18,-14.58,522.7


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-27_06-01-53
  done: false
  episode_len_mean: 526.16
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.060399999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2407
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45094064131848877
          cur_lr: 5.000000000000001e-05
          entropy: 2.0219615472687615
          entropy_coeff: 0.009999999999999998
          kl: 0.01588096244299849
          policy_loss: -0.04422377559045951
          total_loss: 0.035181465331051084
          vf_explained_var: 0.35851165652275085
          vf_loss: 0.09246348714223132
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,643,31928.5,643000,-4.0604,7.18,-14.58,526.16


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-27_06-02-12
  done: false
  episode_len_mean: 527.81
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.10699999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2409
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45094064131848877
          cur_lr: 5.000000000000001e-05
          entropy: 1.9994304643736944
          entropy_coeff: 0.009999999999999998
          kl: 0.011950085058156507
          policy_loss: 0.028805669107370906
          total_loss: 0.23839770733482307
          vf_explained_var: 0.37431368231773376
          vf_loss: 0.2241975583963924
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 64400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,644,31947.3,644000,-4.107,7.18,-14.58,527.81


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-27_06-02-30
  done: false
  episode_len_mean: 527.64
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.08469999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2410
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45094064131848877
          cur_lr: 5.000000000000001e-05
          entropy: 2.0427525480588278
          entropy_coeff: 0.009999999999999998
          kl: 0.013006996588661125
          policy_loss: -0.05098726683192783
          total_loss: 0.009659947413537238
          vf_explained_var: 0.6005343794822693
          vf_loss: 0.0752093574239148
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 64500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,645,31965.1,645000,-4.0847,7.18,-14.58,527.64




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-27_06-03-03
  done: false
  episode_len_mean: 521.68
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: -4.03749999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2413
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45094064131848877
          cur_lr: 5.000000000000001e-05
          entropy: 2.092796222368876
          entropy_coeff: 0.009999999999999998
          kl: 0.042119039270454635
          policy_loss: -0.07245223960942693
          total_loss: 0.14695337290565172
          vf_explained_var: 0.6222816109657288
          vf_loss: 0.22134039004643757
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,646,31997.9,646000,-4.0375,7.18,-14.58,521.68


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-27_06-03-21
  done: false
  episode_len_mean: 532.18
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.242499999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2415
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6764109619777329
          cur_lr: 5.000000000000001e-05
          entropy: 2.0097891012827556
          entropy_coeff: 0.009999999999999998
          kl: 0.013719122074117528
          policy_loss: 0.02385219410061836
          total_loss: 0.12132357226477729
          vf_explained_var: 0.41100645065307617
          vf_loss: 0.10828950433577929
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,647,32015.8,647000,-4.2425,5.83,-14.58,532.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-27_06-03-38
  done: false
  episode_len_mean: 531.33
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.21319999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2416
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6764109619777329
          cur_lr: 5.000000000000001e-05
          entropy: 1.8551023973359002
          entropy_coeff: 0.009999999999999998
          kl: 0.010245116355382401
          policy_loss: 0.012269205310278468
          total_loss: 0.1771661346571313
          vf_explained_var: 0.12891671061515808
          vf_loss: 0.17651804303362345
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,648,32033.1,648000,-4.2132,5.83,-14.58,531.33


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-27_06-03-55
  done: false
  episode_len_mean: 537.12
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.27339999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2418
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6764109619777329
          cur_lr: 5.000000000000001e-05
          entropy: 2.131728182898627
          entropy_coeff: 0.009999999999999998
          kl: 0.009775842229114358
          policy_loss: -0.05799460249642531
          total_loss: -0.008449926558468077
          vf_explained_var: 0.1951400488615036
          vf_loss: 0.06424946863618163
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,649,32050.1,649000,-4.2734,5.83,-14.58,537.12


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-27_06-04-14
  done: false
  episode_len_mean: 540.83
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.34889999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2420
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6764109619777329
          cur_lr: 5.000000000000001e-05
          entropy: 2.175370881292555
          entropy_coeff: 0.009999999999999998
          kl: 0.026399499982105685
          policy_loss: 0.06340514909889963
          total_loss: 0.40801332394282025
          vf_explained_var: 0.4129008948802948
          vf_loss: 0.34850497398939395
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,650,32069,650000,-4.3489,5.83,-14.58,540.83


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-27_06-04-32
  done: false
  episode_len_mean: 545.36
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.45309999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2422
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 2.1188643905851574
          entropy_coeff: 0.009999999999999998
          kl: 0.005514662125268297
          policy_loss: -0.0203448173072603
          total_loss: 0.07597186350160175
          vf_explained_var: 0.039980027824640274
          vf_loss: 0.111910055950284
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,651,32087,651000,-4.4531,5.83,-14.58,545.36


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-27_06-04-49
  done: false
  episode_len_mean: 551.28
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.55099999999994
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2423
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 1.9968666579988268
          entropy_coeff: 0.009999999999999998
          kl: 0.0057444415117192255
          policy_loss: -0.058408829466336305
          total_loss: 0.046220914440022576
          vf_explained_var: 0.06832709908485413
          vf_loss: 0.11877000369907667
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,652,32103.6,652000,-4.551,5.83,-14.58,551.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-27_06-05-05
  done: false
  episode_len_mean: 553.32
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.622999999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2425
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 2.20089737839169
          entropy_coeff: 0.009999999999999998
          kl: 0.012599883417642212
          policy_loss: -0.06941798836406735
          total_loss: 0.24724427215341066
          vf_explained_var: 0.32853376865386963
          vf_loss: 0.32588718425896435
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,653,32120.1,653000,-4.623,5.83,-14.58,553.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-27_06-05-40
  done: false
  episode_len_mean: 551.78
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.628799999999938
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 2
  episodes_total: 2427
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 2.033888014157613
          entropy_coeff: 0.009999999999999998
          kl: 0.006655988595908744
          policy_loss: -0.12121135377221637
          total_loss: -0.01529453806579113
          vf_explained_var: 0.19283485412597656
          vf_loss: 0.11950241959032913
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,654,32155.2,654000,-4.6288,5.83,-14.58,551.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-27_06-06-17
  done: false
  episode_len_mean: 548.36
  episode_media: {}
  episode_reward_max: 5.83
  episode_reward_mean: -4.466699999999939
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 3
  episodes_total: 2430
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 2.1949244764116074
          entropy_coeff: 0.009999999999999998
          kl: 0.01488196235866541
          policy_loss: -0.1118783368004693
          total_loss: 0.19283909855617418
          vf_explained_var: 0.7012792229652405
          vf_loss: 0.311567193393906
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,655,32192.3,655000,-4.4667,5.83,-14.58,548.36


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-27_06-06-33
  done: false
  episode_len_mean: 554.18
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.595099999999938
  episode_reward_min: -14.579999999999886
  episodes_this_iter: 1
  episodes_total: 2431
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0146164429665994
          cur_lr: 5.000000000000001e-05
          entropy: 1.5531656722227731
          entropy_coeff: 0.009999999999999998
          kl: 0.024315118564587835
          policy_loss: 0.0544961160255803
          total_loss: 0.3323194364292754
          vf_explained_var: -0.042083267122507095
          vf_loss: 0.26868445813371283
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 65600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,656,32208,656000,-4.5951,5.64,-14.58,554.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-27_06-06-50
  done: false
  episode_len_mean: 553.92
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.453599999999939
  episode_reward_min: -13.919999999999884
  episodes_this_iter: 2
  episodes_total: 2433
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5219246644498996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1312141948276095
          entropy_coeff: 0.009999999999999998
          kl: 0.017488858706813718
          policy_loss: -0.007111678189701504
          total_loss: 0.37465976116557914
          vf_explained_var: 0.44699957966804504
          vf_loss: 0.3764668574142787
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 6570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,657,32224.7,657000,-4.4536,5.64,-13.92,553.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-27_06-07-06
  done: false
  episode_len_mean: 557.16
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.4969999999999395
  episode_reward_min: -13.919999999999884
  episodes_this_iter: 2
  episodes_total: 2435
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5219246644498996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1507643779118855
          entropy_coeff: 0.009999999999999998
          kl: 0.004518668293756534
          policy_loss: -0.09714188393619326
          total_loss: 0.0637438020358483
          vf_explained_var: 0.2337433099746704
          vf_loss: 0.17551625588093883
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 65800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,658,32240.6,658000,-4.497,5.64,-13.92,557.16




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-27_06-07-40
  done: false
  episode_len_mean: 553.52
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.235299999999941
  episode_reward_min: -11.129999999999926
  episodes_this_iter: 2
  episodes_total: 2437
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7609623322249498
          cur_lr: 5.000000000000001e-05
          entropy: 2.0398980299631755
          entropy_coeff: 0.009999999999999998
          kl: 0.008720785534709681
          policy_loss: -0.17780267426537144
          total_loss: 0.004067097718103064
          vf_explained_var: 0.04935775697231293
          vf_loss: 0.19563256092886958
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,659,32274.6,659000,-4.2353,5.64,-11.13,553.52


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-27_06-07-56
  done: false
  episode_len_mean: 553.48
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.18719999999994
  episode_reward_min: -10.839999999999895
  episodes_this_iter: 1
  episodes_total: 2438
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7609623322249498
          cur_lr: 5.000000000000001e-05
          entropy: 1.5968202047877842
          entropy_coeff: 0.009999999999999998
          kl: 0.0038360238878844656
          policy_loss: 0.042598000334368814
          total_loss: 0.2533124081790447
          vf_explained_var: 0.3211202621459961
          vf_loss: 0.22376354306729304
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,660,32290.5,660000,-4.1872,5.64,-10.84,553.48




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-27_06-08-47
  done: false
  episode_len_mean: 544.63
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.048399999999942
  episode_reward_min: -10.839999999999895
  episodes_this_iter: 4
  episodes_total: 2442
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 1.8646546403566997
          entropy_coeff: 0.009999999999999998
          kl: 0.01196898998397372
          policy_loss: 0.14529570697082414
          total_loss: 0.32453779909345837
          vf_explained_var: 0.5612603425979614
          vf_loss: 0.19333466415603956
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,661,32341.8,661000,-4.0484,5.64,-10.84,544.63


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-27_06-09-05
  done: false
  episode_len_mean: 549.91
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.171199999999941
  episode_reward_min: -10.839999999999895
  episodes_this_iter: 1
  episodes_total: 2443
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 2.2340592516793145
          entropy_coeff: 0.009999999999999998
          kl: 0.01268817390906755
          policy_loss: 0.042239536510573494
          total_loss: 0.08775768793291516
          vf_explained_var: 0.19583600759506226
          vf_loss: 0.06303113052288406
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 66200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,662,32359.6,662000,-4.1712,5.64,-10.84,549.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-27_06-09-21
  done: false
  episode_len_mean: 548.83
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.1912999999999405
  episode_reward_min: -10.839999999999895
  episodes_this_iter: 2
  episodes_total: 2445
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 2.131210764249166
          entropy_coeff: 0.009999999999999998
          kl: 0.007872316228565148
          policy_loss: -0.03544088155031204
          total_loss: 0.005657276511192322
          vf_explained_var: 0.560409665107727
          vf_loss: 0.059414996240391496
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 6630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,663,32376.1,663000,-4.1913,5.64,-10.84,548.83


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-27_06-09-37
  done: false
  episode_len_mean: 548.43
  episode_media: {}
  episode_reward_max: 5.640000000000002
  episode_reward_mean: -4.23769999999994
  episode_reward_min: -10.839999999999895
  episodes_this_iter: 1
  episodes_total: 2446
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 2.0825994968414308
          entropy_coeff: 0.009999999999999998
          kl: 0.008536493618354355
          policy_loss: -0.05252789143058989
          total_loss: 0.07936397675010894
          vf_explained_var: -0.0498998798429966
          vf_loss: 0.1494698897521529
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,664,32391.6,664000,-4.2377,5.64,-10.84,548.43




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-27_06-10-30
  done: false
  episode_len_mean: 544.28
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.089999999999941
  episode_reward_min: -10.319999999999933
  episodes_this_iter: 4
  episodes_total: 2450
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 2.1392828596962823
          entropy_coeff: 0.009999999999999998
          kl: 0.012324455836418401
          policy_loss: -0.14409582227882411
          total_loss: -0.04652640979944004
          vf_explained_var: 0.0971694067120552
          vf_loss: 0.11427301986453434
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,665,32445.2,665000,-4.09,5.77,-10.32,544.28


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-27_06-10-48
  done: false
  episode_len_mean: 542.91
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.112499999999941
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2452
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 2.050948945681254
          entropy_coeff: 0.009999999999999998
          kl: 0.013132507420850351
          policy_loss: -0.014246786592735185
          total_loss: 0.1804762090659804
          vf_explained_var: 0.002632005373016
          vf_loss: 0.21023581496103563
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,666,32463,666000,-4.1125,5.77,-11.9,542.91


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-27_06-11-05
  done: false
  episode_len_mean: 541.4
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.1573999999999405
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 1
  episodes_total: 2453
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 1.8542185929086474
          entropy_coeff: 0.009999999999999998
          kl: 0.023811459012243377
          policy_loss: -0.005411848487953345
          total_loss: 0.3466683485441738
          vf_explained_var: 0.4383002519607544
          vf_loss: 0.3615625732888778
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,667,32480.1,667000,-4.1574,5.77,-11.9,541.4




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-27_06-11-41
  done: false
  episode_len_mean: 542.23
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.16249999999994
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 3
  episodes_total: 2456
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5707217491687121
          cur_lr: 5.000000000000001e-05
          entropy: 1.9791273514429728
          entropy_coeff: 0.009999999999999998
          kl: 0.006561141444149696
          policy_loss: -0.11340210818582111
          total_loss: 0.08719342740045653
          vf_explained_var: 0.13761062920093536
          vf_loss: 0.21664222586227375
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 668000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,668,32515.6,668000,-4.1625,5.77,-11.9,542.23


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-27_06-11-57
  done: false
  episode_len_mean: 543.27
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.18029999999994
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 1
  episodes_total: 2457
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5707217491687121
          cur_lr: 5.000000000000001e-05
          entropy: 2.0502599504258896
          entropy_coeff: 0.009999999999999998
          kl: 0.01345231116664463
          policy_loss: -0.01773590391708745
          total_loss: 0.17098589258061514
          vf_explained_var: 0.5851683020591736
          vf_loss: 0.2015468688474761
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,669,32531.6,669000,-4.1803,5.77,-11.9,543.27


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-27_06-12-14
  done: false
  episode_len_mean: 549.82
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.319799999999939
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2459
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5707217491687121
          cur_lr: 5.000000000000001e-05
          entropy: 1.841721060540941
          entropy_coeff: 0.009999999999999998
          kl: 0.016740014998809918
          policy_loss: -0.0001705709844827652
          total_loss: 0.39079069155785773
          vf_explained_var: 0.7041045427322388
          vf_loss: 0.3998245779838827
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,670,32549.1,670000,-4.3198,5.77,-11.9,549.82




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-27_06-13-09
  done: false
  episode_len_mean: 546.21
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.298799999999938
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 4
  episodes_total: 2463
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5707217491687121
          cur_lr: 5.000000000000001e-05
          entropy: 1.9637747685114542
          entropy_coeff: 0.009999999999999998
          kl: 0.019724511993745825
          policy_loss: 0.014592155896955067
          total_loss: 0.2667197614494297
          vf_explained_var: 0.5886826515197754
          vf_loss: 0.2605081419356995
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 671000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,671,32603.4,671000,-4.2988,5.77,-11.9,546.21




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-27_06-13-42
  done: false
  episode_len_mean: 546.37
  episode_media: {}
  episode_reward_max: 5.77
  episode_reward_mean: -4.340599999999938
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2465
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5707217491687121
          cur_lr: 5.000000000000001e-05
          entropy: 1.6010372890366449
          entropy_coeff: 0.009999999999999998
          kl: 0.004728905553435137
          policy_loss: -0.06257166531350877
          total_loss: 0.022662999563746983
          vf_explained_var: 0.1773223876953125
          vf_loss: 0.09854614842010455
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,672,32636.5,672000,-4.3406,5.77,-11.9,546.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-27_06-14-19
  done: false
  episode_len_mean: 539.51
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -4.212299999999939
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2467
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28536087458435605
          cur_lr: 5.000000000000001e-05
          entropy: 1.855884755982293
          entropy_coeff: 0.009999999999999998
          kl: 0.026974936943672607
          policy_loss: -0.07329390719532966
          total_loss: 0.33934497783581413
          vf_explained_var: 0.43131208419799805
          vf_loss: 0.42350014914991335
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,673,32673.5,673000,-4.2123,5.82,-11.9,539.51


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-27_06-14-39
  done: false
  episode_len_mean: 539.23
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -4.205199999999937
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2469
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.6271371748712329
          entropy_coeff: 0.009999999999999998
          kl: 0.018796115818241314
          policy_loss: -0.06939898932145702
          total_loss: 0.2857761395474275
          vf_explained_var: 0.4307310879230499
          vf_loss: 0.3634009846382671
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,674,32693.5,674000,-4.2052,5.82,-11.9,539.23




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-27_06-15-34
  done: false
  episode_len_mean: 528.86
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -4.0556999999999395
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 4
  episodes_total: 2473
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.8252229041523405
          entropy_coeff: 0.009999999999999998
          kl: 0.015978128837623375
          policy_loss: 0.04558517804576291
          total_loss: 0.3387984982795186
          vf_explained_var: 0.23309342563152313
          vf_loss: 0.3046262539198829
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,675,32748.9,675000,-4.0557,5.82,-11.9,528.86


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-27_06-15-57
  done: false
  episode_len_mean: 527.35
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -4.01349999999994
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 2
  episodes_total: 2475
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.8912239882681106
          entropy_coeff: 0.009999999999999998
          kl: 0.01892120488632544
          policy_loss: -0.12506351479225689
          total_loss: 0.22379829953942035
          vf_explained_var: 0.44513189792633057
          vf_loss: 0.35967499679989284
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,676,32771.6,676000,-4.0135,5.82,-11.9,527.35




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-27_06-17-33
  done: false
  episode_len_mean: 501.87
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -3.546499999999944
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 5
  episodes_total: 2480
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.657444088988834
          entropy_coeff: 0.009999999999999998
          kl: 0.016407125862007682
          policy_loss: -0.07703725563155281
          total_loss: 0.416352052324348
          vf_explained_var: 0.5254847407341003
          vf_loss: 0.5029408186674118
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,677,32867.4,677000,-3.5465,5.82,-11.9,501.87




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-27_06-19-45
  done: false
  episode_len_mean: 463.64
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -2.9302999999999484
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 8
  episodes_total: 2488
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.8328896522521974
          entropy_coeff: 0.009999999999999998
          kl: 0.016786612231768198
          policy_loss: -0.09606869883007474
          total_loss: 0.11147632383637958
          vf_explained_var: 0.8659132122993469
          vf_loss: 0.21868855402701431
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,678,32999.1,678000,-2.9303,5.82,-11.9,463.64




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-27_06-20-27
  done: false
  episode_len_mean: 456.05
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -2.817999999999949
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 3
  episodes_total: 2491
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.8513098412089877
          entropy_coeff: 0.009999999999999998
          kl: 0.012477636550987686
          policy_loss: -0.13009324400789207
          total_loss: 0.08876214838690227
          vf_explained_var: 0.6607725620269775
          vf_loss: 0.2320275461508168
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,679,33041.2,679000,-2.818,5.82,-11.9,456.05




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-27_06-21-42
  done: false
  episode_len_mean: 436.81
  episode_media: {}
  episode_reward_max: 5.82
  episode_reward_mean: -2.473499999999952
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 4
  episodes_total: 2495
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42804131187653416
          cur_lr: 5.000000000000001e-05
          entropy: 1.5311409685346815
          entropy_coeff: 0.009999999999999998
          kl: 0.02248896829142003
          policy_loss: -0.11474613580438826
          total_loss: 0.3516004597561227
          vf_explained_var: 0.6262279748916626
          vf_loss: 0.47203180078003143
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,680,33116.3,680000,-2.4735,5.82,-11.9,436.81




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-27_06-23-34
  done: false
  episode_len_mean: 409.58
  episode_media: {}
  episode_reward_max: 7.590000000000005
  episode_reward_mean: -2.0128999999999557
  episode_reward_min: -11.899999999999938
  episodes_this_iter: 6
  episodes_total: 2501
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6420619678148012
          cur_lr: 5.000000000000001e-05
          entropy: 1.648984338177575
          entropy_coeff: 0.009999999999999998
          kl: 0.025692832660303425
          policy_loss: -0.04824564109245936
          total_loss: 0.4528419506218698
          vf_explained_var: 0.7602860331535339
          vf_loss: 0.5010810466276274
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,681,33228.1,681000,-2.0129,7.59,-11.9,409.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-27_06-24-11
  done: false
  episode_len_mean: 406.88
  episode_media: {}
  episode_reward_max: 7.590000000000005
  episode_reward_mean: -2.1052999999999544
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 3
  episodes_total: 2504
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.867239126894209
          entropy_coeff: 0.009999999999999998
          kl: 0.011882039532151327
          policy_loss: 0.03328957897093561
          total_loss: 0.17133173843224844
          vf_explained_var: -0.07546170800924301
          vf_loss: 0.1452710456525286
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 68200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,682,33265.6,682000,-2.1053,7.59,-16.55,406.88




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-27_06-25-49
  done: false
  episode_len_mean: 383.32
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: -1.6255999999999573
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 6
  episodes_total: 2510
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.7960275755988226
          entropy_coeff: 0.009999999999999998
          kl: 0.017607526581673177
          policy_loss: -0.030667294210029974
          total_loss: 0.46598309717244574
          vf_explained_var: 0.717583954334259
          vf_loss: 0.49765299302008414
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 6830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,683,33363.2,683000,-1.6256,9.59,-16.55,383.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-27_06-28-02
  done: false
  episode_len_mean: 352.6
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: -1.1272999999999604
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 8
  episodes_total: 2518
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.8953650991121929
          entropy_coeff: 0.009999999999999998
          kl: 0.01191405540861052
          policy_loss: 0.09881714578304025
          total_loss: 0.25914775265587703
          vf_explained_var: 0.15185044705867767
          vf_loss: 0.16780991343419172
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,684,33496.4,684000,-1.1273,9.59,-16.55,352.6


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-27_06-28-22
  done: false
  episode_len_mean: 351.86
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: -1.1222999999999606
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 1
  episodes_total: 2519
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.854928187529246
          entropy_coeff: 0.009999999999999998
          kl: 0.010034494792532753
          policy_loss: 0.04532608770661884
          total_loss: 0.12251388629277547
          vf_explained_var: 0.16824446618556976
          vf_loss: 0.08607292585462953
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 68500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,685,33516.3,685000,-1.1223,9.59,-16.55,351.86




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-27_06-29-01
  done: false
  episode_len_mean: 346.18
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: -1.0468999999999606
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 3
  episodes_total: 2522
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.8650727536943223
          entropy_coeff: 0.009999999999999998
          kl: 0.012152845934601824
          policy_loss: -0.037176669016480444
          total_loss: 0.2515990774664614
          vf_explained_var: 0.6511785984039307
          vf_loss: 0.2957221551901764
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 68600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,686,33555.7,686000,-1.0469,9.59,-16.55,346.18


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-27_06-29-20
  done: false
  episode_len_mean: 343.37
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: -1.0141999999999616
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 2
  episodes_total: 2524
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.659340308772193
          entropy_coeff: 0.009999999999999998
          kl: 0.011798760311695577
          policy_loss: -0.037814767782886824
          total_loss: 0.27201157949037025
          vf_explained_var: 0.6695032119750977
          vf_loss: 0.3150564462567369
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 68700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,687,33574.8,687000,-1.0142,9.59,-16.55,343.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-27_06-29-57
  done: false
  episode_len_mean: 339.38
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.8151999999999633
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 3
  episodes_total: 2527
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.7897350192070007
          entropy_coeff: 0.009999999999999998
          kl: 0.014356057182315082
          policy_loss: -0.05559210106730461
          total_loss: 0.7371922777758704
          vf_explained_var: 0.5800309777259827
          vf_loss: 0.7968555132548014
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,688,33611.2,688000,-0.8152,9.8,-16.55,339.38


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-27_06-30-14
  done: false
  episode_len_mean: 346.37
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.9840999999999621
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 2
  episodes_total: 2529
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.928501472208235
          entropy_coeff: 0.009999999999999998
          kl: 0.008141472698315865
          policy_loss: -0.017328790989187028
          total_loss: 0.08446025202671686
          vf_explained_var: 0.18094037473201752
          vf_loss: 0.11323306971074393
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,689,33628.7,689000,-0.9841,9.8,-16.55,346.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-27_06-30-51
  done: false
  episode_len_mean: 341.17
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.9389999999999631
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 2
  episodes_total: 2531
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.76788121064504
          entropy_coeff: 0.009999999999999998
          kl: 0.01679837746319595
          policy_loss: 0.02172081189023124
          total_loss: 0.37080883619685967
          vf_explained_var: 0.7376570105552673
          vf_loss: 0.3505884306298362
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,690,33665.4,690000,-0.939,9.8,-16.55,341.17




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-27_06-31-48
  done: false
  episode_len_mean: 326.31
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.6598999999999644
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 4
  episodes_total: 2535
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.798192142115699
          entropy_coeff: 0.009999999999999998
          kl: 0.009605856347308163
          policy_loss: -0.08857068282862504
          total_loss: 0.3905190067158805
          vf_explained_var: 0.7948357462882996
          vf_loss: 0.48782026867071787
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,691,33722.5,691000,-0.6599,9.8,-16.55,326.31


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-27_06-32-05
  done: false
  episode_len_mean: 330.71
  episode_media: {}
  episode_reward_max: 9.8
  episode_reward_mean: -0.773499999999963
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 2
  episodes_total: 2537
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.9726742294099595
          entropy_coeff: 0.009999999999999998
          kl: 0.006433253718623197
          policy_loss: -0.04014385239117675
          total_loss: 0.09295808294167121
          vf_explained_var: 0.47040700912475586
          vf_loss: 0.14663285387472974
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,692,33739,692000,-0.7735,9.8,-16.55,330.71




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-27_06-34-53
  done: false
  episode_len_mean: 296.31
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -0.11329999999996807
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 8
  episodes_total: 2545
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.738901674747467
          entropy_coeff: 0.009999999999999998
          kl: 0.011050175367577205
          policy_loss: -0.06097895304361979
          total_loss: 0.6533853554891216
          vf_explained_var: 0.569085419178009
          vf_loss: 0.7211109762390454
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,693,33907.5,693000,-0.1133,9.83,-16.55,296.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-27_06-35-53
  done: false
  episode_len_mean: 295.31
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -0.06929999999996844
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 4
  episodes_total: 2549
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.8044409698910184
          entropy_coeff: 0.009999999999999998
          kl: 0.008584411252221565
          policy_loss: -0.14549937637315857
          total_loss: 0.019253236800432207
          vf_explained_var: 0.818865954875946
          vf_loss: 0.17452943885388475
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,694,33967.3,694000,-0.0693,9.83,-16.55,295.31




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-27_06-38-09
  done: false
  episode_len_mean: 264.78
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 0.6448000000000276
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 8
  episodes_total: 2557
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.7059478627310858
          entropy_coeff: 0.009999999999999998
          kl: 0.012646876599681584
          policy_loss: 0.036088656220171184
          total_loss: 0.6570510551333427
          vf_explained_var: 0.8526515364646912
          vf_loss: 0.6258417603042391
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,695,34102.7,695000,0.6448,9.84,-16.55,264.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-27_06-39-21
  done: false
  episode_len_mean: 253.67
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 0.8992000000000256
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 4
  episodes_total: 2561
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.9381562617090013
          entropy_coeff: 0.009999999999999998
          kl: 0.006587357221642145
          policy_loss: 0.020509166684415606
          total_loss: 0.34248749253650507
          vf_explained_var: 0.7431150078773499
          vf_loss: 0.3350156499719661
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 696000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,696,34175.1,696000,0.8992,9.84,-16.55,253.67




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-27_06-39-57
  done: false
  episode_len_mean: 254.5
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 0.8937000000000255
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 2
  episodes_total: 2563
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.935840862327152
          entropy_coeff: 0.009999999999999998
          kl: 0.00826335086024401
          policy_loss: -0.06732040262884564
          total_loss: 0.13874624135593575
          vf_explained_var: 0.7688034772872925
          vf_loss: 0.21746667864111563
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,697,34211.3,697000,0.8937,9.84,-16.55,254.5




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-27_06-42-28
  done: false
  episode_len_mean: 230.77
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.4235000000000224
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 9
  episodes_total: 2572
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.866947399245368
          entropy_coeff: 0.009999999999999998
          kl: 0.011181511279640283
          policy_loss: 0.007141991125212775
          total_loss: 0.5127639980779753
          vf_explained_var: 0.3895585834980011
          vf_loss: 0.5135226407631611
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,698,34362.5,698000,1.4235,9.84,-16.55,230.77




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-27_06-44-03
  done: false
  episode_len_mean: 221.32
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.6028000000000207
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 5
  episodes_total: 2577
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9630929517222018
          cur_lr: 5.000000000000001e-05
          entropy: 1.8325038843684727
          entropy_coeff: 0.009999999999999998
          kl: 0.02882485621135247
          policy_loss: -0.0400610723429256
          total_loss: 0.43159455731511115
          vf_explained_var: 0.7034435868263245
          vf_loss: 0.4622196550998423
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,699,34457.5,699000,1.6028,9.84,-16.55,221.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-27_06-44-44
  done: false
  episode_len_mean: 231.3
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.4273000000000218
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 3
  episodes_total: 2580
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4446394275833025
          cur_lr: 5.000000000000001e-05
          entropy: 1.9192852179209392
          entropy_coeff: 0.009999999999999998
          kl: 0.011001882275053889
          policy_loss: -0.0014994765321413677
          total_loss: 0.23192674457612966
          vf_explained_var: 0.7356089353561401
          vf_loss: 0.23672531354758475
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,700,34498.3,700000,1.4273,9.84,-16.55,231.3




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-27_06-46-21
  done: false
  episode_len_mean: 231.27
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.3981000000000219
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 6
  episodes_total: 2586
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4446394275833025
          cur_lr: 5.000000000000001e-05
          entropy: 1.9589485552575854
          entropy_coeff: 0.009999999999999998
          kl: 0.005342268053399055
          policy_loss: 0.03249286909898122
          total_loss: 0.11093225520518091
          vf_explained_var: 0.6941036581993103
          vf_loss: 0.09031122506906589
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 701000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,701,34595.3,701000,1.3981,9.84,-16.55,231.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-27_06-48-15
  done: false
  episode_len_mean: 225.29
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.5088000000000208
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 7
  episodes_total: 2593
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4446394275833025
          cur_lr: 5.000000000000001e-05
          entropy: 1.802543694443173
          entropy_coeff: 0.009999999999999998
          kl: 0.009255306020844204
          policy_loss: 0.03670970714754528
          total_loss: 0.41344810290676026
          vf_explained_var: 0.3904972970485687
          vf_loss: 0.38139325338933205
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,702,34708.5,702000,1.5088,9.84,-16.55,225.29


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-27_06-48-35
  done: false
  episode_len_mean: 230.72
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.4566000000000212
  episode_reward_min: -16.549999999999905
  episodes_this_iter: 1
  episodes_total: 2594
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4446394275833025
          cur_lr: 5.000000000000001e-05
          entropy: 1.8237268447875976
          entropy_coeff: 0.009999999999999998
          kl: 0.004733394846409153
          policy_loss: 0.019756270986464288
          total_loss: 0.2619074409206708
          vf_explained_var: 0.6701077222824097
          vf_loss: 0.25355039010238317
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,703,34729.1,703000,1.4566,9.84,-16.55,230.72




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-27_06-52-27
  done: false
  episode_len_mean: 211.91
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.980500000000018
  episode_reward_min: -11.849999999999888
  episodes_this_iter: 13
  episodes_total: 2607
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7223197137916513
          cur_lr: 5.000000000000001e-05
          entropy: 1.6983348608016968
          entropy_coeff: 0.009999999999999998
          kl: 0.02700420856977445
          policy_loss: 0.055152416643169194
          total_loss: 1.0164231037100155
          vf_explained_var: 0.7760406732559204
          vf_loss: 0.9587483657730951
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,704,34960.9,704000,1.9805,9.84,-11.85,211.91




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-27_06-53-26
  done: false
  episode_len_mean: 217.78
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 1.9520000000000184
  episode_reward_min: -11.849999999999888
  episodes_this_iter: 3
  episodes_total: 2610
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0834795706874765
          cur_lr: 5.000000000000001e-05
          entropy: 1.9864767723613315
          entropy_coeff: 0.009999999999999998
          kl: 0.010007914815692824
          policy_loss: -0.08992382919208872
          total_loss: 0.2559319055794428
          vf_explained_var: 0.4410989582538605
          vf_loss: 0.35487712663469007
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,705,35020.1,705000,1.952,9.84,-11.85,217.78




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-27_06-56-36
  done: false
  episode_len_mean: 203.29
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.2974000000000174
  episode_reward_min: -11.849999999999888
  episodes_this_iter: 11
  episodes_total: 2621
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0834795706874765
          cur_lr: 5.000000000000001e-05
          entropy: 1.9398520827293395
          entropy_coeff: 0.009999999999999998
          kl: 0.008999892309340554
          policy_loss: 0.002968894276354048
          total_loss: 0.3946640650431315
          vf_explained_var: 0.6447061896324158
          vf_loss: 0.4013424867919336
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,706,35209.8,706000,2.2974,9.84,-11.85,203.29




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-27_06-57-57
  done: false
  episode_len_mean: 192.81
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.5077000000000176
  episode_reward_min: -11.849999999999888
  episodes_this_iter: 5
  episodes_total: 2626
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0834795706874765
          cur_lr: 5.000000000000001e-05
          entropy: 1.700221339861552
          entropy_coeff: 0.009999999999999998
          kl: 0.04137024904029343
          policy_loss: 0.12465812584592237
          total_loss: 0.8854792550206184
          vf_explained_var: 0.4800715446472168
          vf_loss: 0.7329995108975305
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,707,35290.6,707000,2.5077,9.84,-11.85,192.81




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-27_06-58-38
  done: false
  episode_len_mean: 192.46
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.463000000000017
  episode_reward_min: -11.849999999999888
  episodes_this_iter: 2
  episodes_total: 2628
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6252193560312151
          cur_lr: 5.000000000000001e-05
          entropy: 1.9317261656125386
          entropy_coeff: 0.009999999999999998
          kl: 0.004422798065093477
          policy_loss: 0.01274473046263059
          total_loss: 0.2949050074236261
          vf_explained_var: 0.3114481270313263
          vf_loss: 0.2942895198447837
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 708000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,708,35331.3,708000,2.463,9.84,-11.85,192.46




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-27_06-59-32
  done: false
  episode_len_mean: 185.92
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.8038000000000154
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 4
  episodes_total: 2632
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8126096780156076
          cur_lr: 5.000000000000001e-05
          entropy: 2.0317314002248974
          entropy_coeff: 0.009999999999999998
          kl: 0.004280611522728033
          policy_loss: -0.08174333497881889
          total_loss: -0.012674666568636894
          vf_explained_var: 0.863280713558197
          vf_loss: 0.08590751788417239
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,709,35386.1,709000,2.8038,9.84,-9.44,185.92


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-27_06-59-54
  done: false
  episode_len_mean: 192.15
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.605000000000016
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 2
  episodes_total: 2634
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.8573611153496636
          entropy_coeff: 0.009999999999999998
          kl: 0.016802576038401934
          policy_loss: 0.007771159294578764
          total_loss: 0.6145131144258711
          vf_explained_var: 0.3493298292160034
          vf_loss: 0.6184885970400905
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 710000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,710,35407.5,710000,2.605,9.84,-9.44,192.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-27_07-01-28
  done: false
  episode_len_mean: 182.82
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.765400000000015
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 5
  episodes_total: 2639
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.9934691058264837
          entropy_coeff: 0.009999999999999998
          kl: 0.011385392406667734
          policy_loss: -0.0344393152743578
          total_loss: 0.17729424074706104
          vf_explained_var: 0.2460564523935318
          vf_loss: 0.22704230188392102
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,711,35502.2,711000,2.7654,9.84,-9.44,182.82




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-27_07-02-29
  done: false
  episode_len_mean: 193.54
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.581700000000016
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 4
  episodes_total: 2643
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.8787417703204685
          entropy_coeff: 0.009999999999999998
          kl: 0.008194442705258058
          policy_loss: -0.03278174367215898
          total_loss: 0.2128481738269329
          vf_explained_var: 0.6141959428787231
          vf_loss: 0.26108789008317723
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 712000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,712,35563.1,712000,2.5817,9.84,-9.44,193.54




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-27_07-04-22
  done: false
  episode_len_mean: 186.97
  episode_media: {}
  episode_reward_max: 9.84
  episode_reward_mean: 2.722500000000016
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 7
  episodes_total: 2650
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.9181078208817377
          entropy_coeff: 0.009999999999999998
          kl: 0.012518095258913349
          policy_loss: -0.013826393998331493
          total_loss: 0.33710721714629066
          vf_explained_var: 0.1785270720720291
          vf_loss: 0.365028523405393
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,713,35675.6,713000,2.7225,9.84,-9.44,186.97




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-27_07-08-44
  done: false
  episode_len_mean: 166.02
  episode_media: {}
  episode_reward_max: 9.82
  episode_reward_mean: 3.1399000000000132
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 14
  episodes_total: 2664
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.7984726044866775
          entropy_coeff: 0.009999999999999998
          kl: 0.012442108640800214
          policy_loss: -0.11583531267113156
          total_loss: 0.0896812666621473
          vf_explained_var: 0.7594797015190125
          vf_loss: 0.21844601237939465
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,714,35937.3,714000,3.1399,9.82,-9.44,166.02




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-27_07-10-38
  done: false
  episode_len_mean: 170.21
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: 3.105900000000014
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 7
  episodes_total: 2671
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.179816256629096
          entropy_coeff: 0.009999999999999998
          kl: 0.011597126269801184
          policy_loss: 0.03521317119399706
          total_loss: 0.08817293850911988
          vf_explained_var: 0.3897024691104889
          vf_loss: 0.07004595741681341
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,715,36051.8,715000,3.1059,9.85,-9.44,170.21




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-27_07-11-53
  done: false
  episode_len_mean: 175.03
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: 3.0320000000000142
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 4
  episodes_total: 2675
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.058385287390815
          entropy_coeff: 0.009999999999999998
          kl: 0.015559234362657924
          policy_loss: -0.09076774418354035
          total_loss: 0.23373633246454928
          vf_explained_var: 0.39792415499687195
          vf_loss: 0.33876613988929116
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,716,36126.6,716000,3.032,9.85,-9.44,175.03


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-27_07-12-13
  done: false
  episode_len_mean: 181.56
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: 2.9266000000000143
  episode_reward_min: -9.439999999999898
  episodes_this_iter: 2
  episodes_total: 2677
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.139799486266242
          entropy_coeff: 0.009999999999999998
          kl: 0.007120471064441889
          policy_loss: -0.04696333466304673
          total_loss: 0.02492158862037791
          vf_explained_var: 0.6038731932640076
          vf_loss: 0.09038984018067518
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,717,36146.7,717000,2.9266,9.85,-9.44,181.56




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-27_07-14-04
  done: false
  episode_len_mean: 170.56
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.241000000000014
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 7
  episodes_total: 2684
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.1500759959220885
          entropy_coeff: 0.009999999999999998
          kl: 0.006215196098207541
          policy_loss: 0.04995158513387044
          total_loss: 0.07335389355818431
          vf_explained_var: 0.6562740802764893
          vf_loss: 0.042377802733002075
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 718000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,718,36257.6,718000,3.241,9.92,-6.96,170.56


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-27_07-14-24
  done: false
  episode_len_mean: 181.19
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 3.0370000000000146
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 2
  episodes_total: 2686
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.1947734938727486
          entropy_coeff: 0.009999999999999998
          kl: 0.007454553215171522
          policy_loss: -0.01265585840576225
          total_loss: 0.08251333816183938
          vf_explained_var: 0.0809330865740776
          vf_loss: 0.11408811169159082
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,719,36277.2,719000,3.037,9.92,-6.96,181.19


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-27_07-14-43
  done: false
  episode_len_mean: 184.52
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.9856000000000154
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 1
  episodes_total: 2687
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.0824122813012864
          entropy_coeff: 0.009999999999999998
          kl: 0.012090687474139215
          policy_loss: -0.0077882569283247
          total_loss: 0.23963862732052804
          vf_explained_var: 0.4019591808319092
          vf_loss: 0.26333850373565737
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,720,36296.4,720000,2.9856,9.92,-6.96,184.52


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-27_07-15-01
  done: false
  episode_len_mean: 189.11
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.844100000000017
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 2
  episodes_total: 2689
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.0588588370217216
          entropy_coeff: 0.009999999999999998
          kl: 0.013989775468915416
          policy_loss: 0.013371829522980584
          total_loss: 0.16438154987990855
          vf_explained_var: 0.5487565994262695
          vf_loss: 0.16591419581737782
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,721,36314.6,721000,2.8441,9.92,-6.96,189.11




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-27_07-16-51
  done: false
  episode_len_mean: 188.82
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.9050000000000167
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 7
  episodes_total: 2696
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.145207138856252
          entropy_coeff: 0.009999999999999998
          kl: 0.006522062470496327
          policy_loss: 0.19901537067360348
          total_loss: 0.23075979070530997
          vf_explained_var: 0.5727096796035767
          vf_loss: 0.050546546984050006
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,722,36424.6,722000,2.905,9.92,-6.96,188.82




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-27_07-17-34
  done: false
  episode_len_mean: 193.96
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.7650000000000183
  episode_reward_min: -6.959999999999947
  episodes_this_iter: 3
  episodes_total: 2699
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 1.810859089427524
          entropy_coeff: 0.009999999999999998
          kl: 0.009608062201040192
          policy_loss: -0.20424191719955867
          total_loss: -0.0016555264592170715
          vf_explained_var: 0.6615210771560669
          vf_loss: 0.21679118434484634
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,723,36467.6,723000,2.765,9.92,-6.96,193.96


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-27_07-17-52
  done: false
  episode_len_mean: 198.76
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.6377000000000193
  episode_reward_min: -7.239999999999912
  episodes_this_iter: 1
  episodes_total: 2700
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4063048390078038
          cur_lr: 5.000000000000001e-05
          entropy: 2.0522508475515577
          entropy_coeff: 0.009999999999999998
          kl: 0.022714232490879956
          policy_loss: -0.011901494943433337
          total_loss: 0.28174048070278435
          vf_explained_var: 0.4891766905784607
          vf_loss: 0.3049355829755465
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,724,36485.9,724000,2.6377,9.92,-7.24,198.76




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-27_07-20-20
  done: false
  episode_len_mean: 203.65
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.4894000000000203
  episode_reward_min: -7.239999999999912
  episodes_this_iter: 9
  episodes_total: 2709
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6094572585117055
          cur_lr: 5.000000000000001e-05
          entropy: 2.1503883547253078
          entropy_coeff: 0.009999999999999998
          kl: 0.010081878347757142
          policy_loss: 0.1592652339902189
          total_loss: 0.2653091029988395
          vf_explained_var: 0.7540789842605591
          vf_loss: 0.12140327787233723
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,725,36633.6,725000,2.4894,9.92,-7.24,203.65




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-27_07-21-55
  done: false
  episode_len_mean: 204.83
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.42400000000002
  episode_reward_min: -7.239999999999912
  episodes_this_iter: 5
  episodes_total: 2714
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6094572585117055
          cur_lr: 5.000000000000001e-05
          entropy: 2.0564845111634997
          entropy_coeff: 0.009999999999999998
          kl: 0.010903669723092611
          policy_loss: -0.05735332284950548
          total_loss: 0.18341700616810058
          vf_explained_var: 0.7807391881942749
          vf_loss: 0.25468985291404855
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,726,36728.5,726000,2.424,9.92,-7.24,204.83


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-27_07-22-14
  done: false
  episode_len_mean: 216.95
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.1375000000000215
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 2
  episodes_total: 2716
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6094572585117055
          cur_lr: 5.000000000000001e-05
          entropy: 1.054129378663169
          entropy_coeff: 0.009999999999999998
          kl: 0.015149213767432742
          policy_loss: -0.10249002522064579
          total_loss: 0.14471476835509142
          vf_explained_var: 0.524273693561554
          vf_loss: 0.24851329144504336
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,727,36747.2,727000,2.1375,9.92,-14.69,216.95


Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-27_07-22-32
  done: false
  episode_len_mean: 227.35
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 1.9553000000000227
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 2
  episodes_total: 2718
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6094572585117055
          cur_lr: 5.000000000000001e-05
          entropy: 2.0961328744888306
          entropy_coeff: 0.009999999999999998
          kl: 0.006986476958237967
          policy_loss: -0.06829314322935211
          total_loss: 0.041580950675739185
          vf_explained_var: 0.462147980928421
          vf_loss: 0.12657746368398268
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,728,36765.3,728000,1.9553,9.92,-14.69,227.35




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-27_07-24-06
  done: false
  episode_len_mean: 219.25
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 1.982500000000022
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 5
  episodes_total: 2723
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6094572585117055
          cur_lr: 5.000000000000001e-05
          entropy: 1.8294503331184386
          entropy_coeff: 0.009999999999999998
          kl: 0.0395376857755606
          policy_loss: 0.009085222168101205
          total_loss: 0.5538960584335857
          vf_explained_var: 0.6538557410240173
          vf_loss: 0.5390088039967749
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,729,36859.2,729000,1.9825,9.92,-14.69,219.25




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-27_07-26-58
  done: false
  episode_len_mean: 205.57
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.1854000000000213
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 10
  episodes_total: 2733
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 2.1080799327956306
          entropy_coeff: 0.009999999999999998
          kl: 0.009381430031458965
          policy_loss: 0.08796524107456208
          total_loss: 0.20447831758194498
          vf_explained_var: 0.8600515723228455
          vf_loss: 0.12901750243165427
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,730,37031.2,730000,2.1854,9.92,-14.69,205.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-27_07-27-55
  done: false
  episode_len_mean: 205.84
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.246900000000021
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 4
  episodes_total: 2737
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 2.1533545838461983
          entropy_coeff: 0.009999999999999998
          kl: 0.009345377839099249
          policy_loss: 0.05358679696089692
          total_loss: 0.16919915891355938
          vf_explained_var: 0.6090323328971863
          vf_loss: 0.12860249512725405
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,731,37088.7,731000,2.2469,9.92,-14.69,205.84




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-27_07-29-49
  done: false
  episode_len_mean: 199.24
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.4418000000000193
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 6
  episodes_total: 2743
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 1.7579672469033136
          entropy_coeff: 0.009999999999999998
          kl: 0.014071166289565657
          policy_loss: -0.03140449523925781
          total_loss: 0.30538491333524387
          vf_explained_var: 0.7761499285697937
          vf_loss: 0.34150542285707264
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,732,37201.9,732000,2.4418,9.92,-14.69,199.24




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-27_07-31-07
  done: false
  episode_len_mean: 205.61
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.31340000000002
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 5
  episodes_total: 2748
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 1.968635728624132
          entropy_coeff: 0.009999999999999998
          kl: 0.016693687215163584
          policy_loss: 0.005043549877074029
          total_loss: 0.6485633505715265
          vf_explained_var: 0.5911524295806885
          vf_loss: 0.6479450298680199
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,733,37280.2,733000,2.3134,9.92,-14.69,205.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-27_07-33-21
  done: false
  episode_len_mean: 202.8
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.4724000000000195
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 8
  episodes_total: 2756
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 1.9440480788548788
          entropy_coeff: 0.009999999999999998
          kl: 0.015688442157895822
          policy_loss: -0.13469574517673916
          total_loss: 0.12121579895416895
          vf_explained_var: 0.651210606098175
          vf_loss: 0.2610098714629809
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,734,37414.3,734000,2.4724,9.92,-14.69,202.8




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-27_07-35-00
  done: false
  episode_len_mean: 212.27
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.35500000000002
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 6
  episodes_total: 2762
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9141858877675587
          cur_lr: 5.000000000000001e-05
          entropy: 1.9672598454687331
          entropy_coeff: 0.009999999999999998
          kl: 0.0047801995795878505
          policy_loss: -0.0021758156311180855
          total_loss: 0.0932696555637651
          vf_explained_var: 0.8771694898605347
          vf_loss: 0.1107480781359805
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,735,37513.3,735000,2.355,9.92,-14.69,212.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-27_07-38-33
  done: false
  episode_len_mean: 203.86
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.493700000000019
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 12
  episodes_total: 2774
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45709294388377936
          cur_lr: 5.000000000000001e-05
          entropy: 2.059646240870158
          entropy_coeff: 0.009999999999999998
          kl: 0.010465863910445903
          policy_loss: 0.21376775279641153
          total_loss: 0.3257918042441209
          vf_explained_var: 0.5918826460838318
          vf_loss: 0.12783664042750995
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,736,37726.6,736000,2.4937,9.92,-14.69,203.86




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-27_07-41-07
  done: false
  episode_len_mean: 187.72
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 2.892000000000017
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 8
  episodes_total: 2782
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45709294388377936
          cur_lr: 5.000000000000001e-05
          entropy: 1.9620773315429687
          entropy_coeff: 0.009999999999999998
          kl: 0.021592750277414356
          policy_loss: -0.054062761242191
          total_loss: 0.6531439579195446
          vf_explained_var: 0.7974109053611755
          vf_loss: 0.7169576042228275
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,737,37880.6,737000,2.892,9.92,-14.69,187.72




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-27_07-43-08
  done: false
  episode_len_mean: 168.8
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 3.337100000000015
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 7
  episodes_total: 2789
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.685639415825669
          cur_lr: 5.000000000000001e-05
          entropy: 1.53244336909718
          entropy_coeff: 0.009999999999999998
          kl: 0.012591112457372274
          policy_loss: 0.02763838627272182
          total_loss: 0.45827020530899365
          vf_explained_var: 0.773103654384613
          vf_loss: 0.4373232910202609
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,738,38001.6,738000,3.3371,9.91,-14.69,168.8




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-27_07-44-06
  done: false
  episode_len_mean: 167.54
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 3.411000000000014
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 4
  episodes_total: 2793
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.685639415825669
          cur_lr: 5.000000000000001e-05
          entropy: 2.060557406478458
          entropy_coeff: 0.009999999999999998
          kl: 0.020480916479152376
          policy_loss: -0.018849377644558747
          total_loss: 0.5009258433348602
          vf_explained_var: 0.7030490636825562
          vf_loss: 0.5263382744044065
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,739,38058.7,739000,3.411,9.91,-14.69,167.54




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-27_07-49-31
  done: false
  episode_len_mean: 145.03
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 3.9193000000000113
  episode_reward_min: -14.6899999999999
  episodes_this_iter: 18
  episodes_total: 2811
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.7921739803420174
          entropy_coeff: 0.009999999999999998
          kl: 0.013910682881626732
          policy_loss: 0.12955369071827993
          total_loss: 0.6645222745007939
          vf_explained_var: 0.7185719609260559
          vf_loss: 0.5385837554931641
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,740,38383.7,740000,3.9193,9.91,-14.69,145.03




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-27_07-56-21
  done: false
  episode_len_mean: 111.18
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 4.641100000000008
  episode_reward_min: -5.479999999999963
  episodes_this_iter: 21
  episodes_total: 2832
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.2216966403855218
          entropy_coeff: 0.009999999999999998
          kl: 0.0166178440213801
          policy_loss: -0.12588545928398767
          total_loss: 0.6401444392899672
          vf_explained_var: 0.8098100423812866
          vf_loss: 0.7611560934119754
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,741,38793.7,741000,4.6411,9.92,-5.48,111.18




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-27_07-58-53
  done: false
  episode_len_mean: 101.35
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 4.910300000000006
  episode_reward_min: -4.779999999999942
  episodes_this_iter: 8
  episodes_total: 2840
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.992130524582333
          entropy_coeff: 0.009999999999999998
          kl: 0.008910946412724977
          policy_loss: -0.021373681227366128
          total_loss: 0.1745378517028358
          vf_explained_var: 0.6921438574790955
          vf_loss: 0.20666829256547822
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,742,38945.7,742000,4.9103,9.92,-4.78,101.35




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-27_08-03-57
  done: false
  episode_len_mean: 87.33
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 5.122200000000005
  episode_reward_min: -4.679999999999936
  episodes_this_iter: 17
  episodes_total: 2857
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.6677563124232822
          entropy_coeff: 0.009999999999999998
          kl: 0.012100414294178914
          policy_loss: -0.021371917095449236
          total_loss: 0.449612835711903
          vf_explained_var: 0.6410874724388123
          vf_loss: 0.4752175337738461
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,743,39250.1,743000,5.1222,9.92,-4.68,87.33




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-27_08-11-53
  done: false
  episode_len_mean: 69.54
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 5.693000000000001
  episode_reward_min: -4.679999999999936
  episodes_this_iter: 24
  episodes_total: 2881
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.5741184923383924
          entropy_coeff: 0.009999999999999998
          kl: 0.010281340760209387
          policy_loss: -0.10389486948649089
          total_loss: 0.26603835026423134
          vf_explained_var: 0.9427388310432434
          vf_loss: 0.3751004661122958
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,744,39725.5,744000,5.693,9.93,-4.68,69.54




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-27_08-13-28
  done: false
  episode_len_mean: 70.14
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 5.755200000000005
  episode_reward_min: -4.679999999999936
  episodes_this_iter: 6
  episodes_total: 2887
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 0.7730942659907871
          entropy_coeff: 0.009999999999999998
          kl: 0.017943060230685544
          policy_loss: 0.018055709016819795
          total_loss: 0.9281984562675158
          vf_explained_var: 0.5373560190200806
          vf_loss: 0.8994199835591846
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,745,39820.9,745000,5.7552,9.93,-4.68,70.14




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-27_08-24-18
  done: false
  episode_len_mean: 55.67
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.125400000000004
  episode_reward_min: -13.439999999999907
  episodes_this_iter: 34
  episodes_total: 2921
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.2180959781010945
          entropy_coeff: 0.009999999999999998
          kl: 0.009928658851891786
          policy_loss: -0.01989862819512685
          total_loss: 0.4501788259794315
          vf_explained_var: 0.9155179262161255
          vf_loss: 0.4720472087462743
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,746,40470.7,746000,6.1254,9.93,-13.44,55.67




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-27_08-31-57
  done: false
  episode_len_mean: 47.46
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.124500000000004
  episode_reward_min: -13.439999999999907
  episodes_this_iter: 24
  episodes_total: 2945
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.6463824894693162
          entropy_coeff: 0.009999999999999998
          kl: 0.013423880131678424
          policy_loss: 0.04819792484243711
          total_loss: 0.640614265203476
          vf_explained_var: 0.7275803685188293
          vf_loss: 0.5950742509629992
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 747000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,747,40930.2,747000,6.1245,9.93,-13.44,47.46




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-27_08-37-06
  done: false
  episode_len_mean: 47.77
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.104000000000003
  episode_reward_min: -13.439999999999907
  episodes_this_iter: 16
  episodes_total: 2961
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.6931263433562385
          entropy_coeff: 0.009999999999999998
          kl: 0.009083431561483144
          policy_loss: -0.05648494238654773
          total_loss: 0.4655565997792615
          vf_explained_var: 0.6714658737182617
          vf_loss: 0.5296308649910821
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,748,41238.6,748000,6.104,9.93,-13.44,47.77




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-27_08-45-41
  done: false
  episode_len_mean: 36.99
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 6.259400000000002
  episode_reward_min: -6.769999999999918
  episodes_this_iter: 27
  episodes_total: 2988
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.572575909561581
          entropy_coeff: 0.009999999999999998
          kl: 0.007792227176334008
          policy_loss: -0.15502512355645498
          total_loss: 0.05584768628080686
          vf_explained_var: 0.6487732529640198
          vf_loss: 0.2185845836997032
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,749,41753.9,749000,6.2594,9.92,-6.77,36.99




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-27_08-53-19
  done: false
  episode_len_mean: 42.32
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.171800000000003
  episode_reward_min: -6.769999999999918
  episodes_this_iter: 24
  episodes_total: 3012
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.7435408552487692
          entropy_coeff: 0.009999999999999998
          kl: 0.008761329437059541
          policy_loss: 0.06898166769080692
          total_loss: 0.41190418668298256
          vf_explained_var: 0.742293119430542
          vf_loss: 0.35134725632766883
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,750,42211.8,750000,6.1718,9.93,-6.77,42.32




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-27_09-00-16
  done: false
  episode_len_mean: 43.3
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.299000000000001
  episode_reward_min: -6.769999999999918
  episodes_this_iter: 22
  episodes_total: 3034
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.705246619383494
          entropy_coeff: 0.009999999999999998
          kl: 0.0055842168169287255
          policy_loss: 0.016472029272052977
          total_loss: 0.20134410055147278
          vf_explained_var: 0.7280028462409973
          vf_loss: 0.1961813996028569
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,751,42629.3,751000,6.299,9.93,-6.77,43.3




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-27_09-15-04
  done: false
  episode_len_mean: 31.93
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.3004999999999995
  episode_reward_min: -2.8399999999999817
  episodes_this_iter: 46
  episodes_total: 3080
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.2037145945760939
          entropy_coeff: 0.009999999999999998
          kl: 0.006237258920978306
          policy_loss: -0.1570768487950166
          total_loss: 0.17006737250420784
          vf_explained_var: 0.9406247735023499
          vf_loss: 0.33276660442352296
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 752000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,752,43516.8,752000,6.3005,9.93,-2.84,31.93




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-27_09-24-49
  done: false
  episode_len_mean: 30.13
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.3027999999999995
  episode_reward_min: -2.269999999999985
  episodes_this_iter: 31
  episodes_total: 3111
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.6424958321783278
          entropy_coeff: 0.009999999999999998
          kl: 0.007804187516406063
          policy_loss: 0.12148361139827304
          total_loss: 0.2686003998749786
          vf_explained_var: 0.7055225372314453
          vf_loss: 0.1555154585176044
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,753,44102.3,753000,6.3028,9.93,-2.27,30.13




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-27_09-33-05
  done: false
  episode_len_mean: 25.45
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.559199999999999
  episode_reward_min: -1.969999999999987
  episodes_this_iter: 25
  episodes_total: 3136
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.3808399240175884
          entropy_coeff: 0.009999999999999998
          kl: 0.010127520928129267
          policy_loss: -0.06347404496951235
          total_loss: 0.5071262286769019
          vf_explained_var: 0.9375001788139343
          vf_loss: 0.5739929325050778
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,754,44597.9,754000,6.5592,9.93,-1.97,25.45




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-27_09-51-04
  done: false
  episode_len_mean: 22.57
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 6.770200000000001
  episode_reward_min: -8.749999999999941
  episodes_this_iter: 57
  episodes_total: 3193
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.106926425298055
          entropy_coeff: 0.009999999999999998
          kl: 0.009805262302564977
          policy_loss: -0.010308127767509884
          total_loss: 0.3862929257667727
          vf_explained_var: 0.9436254501342773
          vf_loss: 0.39758600923750137
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,755,45676.7,755000,6.7702,9.94,-8.75,22.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-27_10-01-50
  done: false
  episode_len_mean: 23.37
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 6.533100000000002
  episode_reward_min: -8.749999999999941
  episodes_this_iter: 33
  episodes_total: 3226
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.3327583743466271
          entropy_coeff: 0.009999999999999998
          kl: 0.00858303467276621
          policy_loss: -0.1677325223882993
          total_loss: 0.34670161364807023
          vf_explained_var: 0.8935757279396057
          vf_loss: 0.518934428691864
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,756,46323.2,756000,6.5331,9.94,-8.75,23.37




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-27_10-11-19
  done: false
  episode_len_mean: 22.58
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 6.384100000000001
  episode_reward_min: -8.859999999999916
  episodes_this_iter: 30
  episodes_total: 3256
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0284591237385035
          cur_lr: 5.000000000000001e-05
          entropy: 1.1776649514834086
          entropy_coeff: 0.009999999999999998
          kl: 0.004960693482602628
          policy_loss: -0.13355912276440196
          total_loss: 0.04280459417237176
          vf_explained_var: 0.962972104549408
          vf_loss: 0.18303849757131602
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,757,46892,757000,6.3841,9.94,-8.86,22.58




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-27_10-18-50
  done: false
  episode_len_mean: 28.57
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 6.325100000000001
  episode_reward_min: -8.859999999999916
  episodes_this_iter: 24
  episodes_total: 3280
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5142295618692517
          cur_lr: 5.000000000000001e-05
          entropy: 0.6582619398832321
          entropy_coeff: 0.009999999999999998
          kl: 0.004313865618542955
          policy_loss: -0.08758004539542728
          total_loss: 0.15103682238194677
          vf_explained_var: 0.7271302938461304
          vf_loss: 0.2429811742570665
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,758,47343.1,758000,6.3251,9.94,-8.86,28.57




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-27_10-28-40
  done: false
  episode_len_mean: 35.15
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.469900000000003
  episode_reward_min: -8.859999999999916
  episodes_this_iter: 31
  episodes_total: 3311
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.5731874045398501
          entropy_coeff: 0.009999999999999998
          kl: 0.0059890003338760895
          policy_loss: -0.004597075697448518
          total_loss: 0.1565076174835364
          vf_explained_var: 0.5778098106384277
          vf_loss: 0.16529670912358496
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,759,47932.6,759000,6.4699,9.93,-8.86,35.15




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-27_10-46-26
  done: false
  episode_len_mean: 25.51
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.431600000000002
  episode_reward_min: -7.569999999999883
  episodes_this_iter: 55
  episodes_total: 3366
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.9162454254097409
          entropy_coeff: 0.009999999999999998
          kl: 0.014657706737238247
          policy_loss: 0.027333931831849945
          total_loss: 0.37570661641657355
          vf_explained_var: 0.9413928389549255
          vf_loss: 0.353766429093149
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,760,48999.1,760000,6.4316,9.93,-7.57,25.51




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-27_11-08-37
  done: false
  episode_len_mean: 14.23
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.5298
  episode_reward_min: 3.9
  episodes_this_iter: 69
  episodes_total: 3435
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.9381727370950911
          entropy_coeff: 0.009999999999999998
          kl: 0.011240292624796026
          policy_loss: 0.011750735425286823
          total_loss: 0.23806521706283093
          vf_explained_var: 0.9613524675369263
          vf_loss: 0.23280616501967114
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  iterations_since_restore: 761
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,761,50329.3,761000,6.5298,9.93,3.9,14.23




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-27_11-33-02
  done: false
  episode_len_mean: 13.68
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.7549
  episode_reward_min: 3.9
  episodes_this_iter: 76
  episodes_total: 3511
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.7022062109576331
          entropy_coeff: 0.009999999999999998
          kl: 0.008772746244168792
          policy_loss: 0.011485522985458374
          total_loss: 0.15635514474577375
          vf_explained_var: 0.9790595769882202
          vf_loss: 0.14963608160614966
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 762000
  iterations_since_restore: 762
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,762,51795.1,762000,6.7549,9.93,3.9,13.68




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-27_11-54-57
  done: false
  episode_len_mean: 14.27
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 6.8098
  episode_reward_min: 0.4300000000000044
  episodes_this_iter: 68
  episodes_total: 3579
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.8489180492030249
          entropy_coeff: 0.009999999999999998
          kl: 0.01757746285561102
          policy_loss: 0.02965494067304664
          total_loss: 0.21583045263671213
          vf_explained_var: 0.9727251529693604
          vf_loss: 0.19014526986413532
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,763,53109.3,763000,6.8098,9.94,0.43,14.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-27_12-19-10
  done: false
  episode_len_mean: 13.38
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.140499999999999
  episode_reward_min: 3.83
  episodes_this_iter: 75
  episodes_total: 3654
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.6164531545506583
          entropy_coeff: 0.009999999999999998
          kl: 0.006433831452506884
          policy_loss: 0.0635380859590239
          total_loss: 0.1851467865208785
          vf_explained_var: 0.9805253744125366
          vf_loss: 0.12611899822950362
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,764,54562.2,764000,7.1405,9.94,3.83,13.38




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-27_12-41-27
  done: false
  episode_len_mean: 14.27
  episode_media: {}
  episode_reward_max: 9.93
  episode_reward_mean: 6.912599999999998
  episode_reward_min: 3.8200000000000003
  episodes_this_iter: 68
  episodes_total: 3722
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.8132523953914642
          entropy_coeff: 0.009999999999999998
          kl: 0.011403479508872567
          policy_loss: -0.05740652937028143
          total_loss: 0.14232147940331036
          vf_explained_var: 0.9609628319740295
          vf_loss: 0.20492853191163804
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,765,55899.7,765000,6.9126,9.93,3.82,14.27




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-27_13-06-31
  done: false
  episode_len_mean: 13.61
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.007299999999998
  episode_reward_min: 4.770000000000001
  episodes_this_iter: 77
  episodes_total: 3799
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.6670134835773044
          entropy_coeff: 0.009999999999999998
          kl: 0.007790741306554239
          policy_loss: 0.006299662672811084
          total_loss: 0.14109856221410963
          vf_explained_var: 0.9762188196182251
          vf_loss: 0.13946592124799886
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,766,57403.9,766000,7.0073,9.94,4.77,13.61




Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-27_13-22-09
  done: false
  episode_len_mean: 16.88
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.186099999999999
  episode_reward_min: 2.520000000000012
  episodes_this_iter: 48
  episodes_total: 3847
  experiment_id: f127dc46046f4004934563dcc31441c2
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.6788589000701905
          entropy_coeff: 0.009999999999999998
          kl: 0.00937454644690868
          policy_loss: 0.07940566167235374
          total_loss: 0.3916327837440703
          vf_explained_var: 0.9504621624946594
          vf_loss: 0.3166053710712327
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,RUNNING,192.168.3.5:206,767,58341.9,767000,7.1861,9.94,2.52,16.88


2021-10-27 13:26:31,013	ERROR trial_runner.py:773 -- Trial PPO_my_env_f560f_00000: Error processing event.
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 739, in _process_trial
    results = self.trial_executor.fetch_result(trial)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 746, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/_private/client_mode_hook.py", line 82, in wrapper
    return func(*args, **kwargs)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/worker.py", line 1621, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(timeout): [36mray::PPO.train()[39m (pid=206, ip=192.168.3.5, repr=PPO)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 648, in train
   

Result for PPO_my_env_f560f_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-27_13-22-09
  done: false
  episode_len_mean: 16.88
  episode_media: {}
  episode_reward_max: 9.940000000000001
  episode_reward_mean: 7.186099999999999
  episode_reward_min: 2.520000000000012
  episodes_this_iter: 48
  episodes_total: 3847
  experiment_id: f127dc46046f4004934563dcc31441c2
  experiment_tag: '0'
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25711478093462586
          cur_lr: 5.000000000000001e-05
          entropy: 0.6788589000701905
          entropy_coeff: 0.009999999999999998
          kl: 0.00937454644690868
          policy_loss: 0.07940566167235374
          total_loss: 0.3916327837440703
          vf_explained_var: 0.9504621624946594
          vf_loss: 0.3166053710712327
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_step

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,ERROR,,767,58341.9,767000,7.1861,9.94,2.52,16.88

Trial name,# failures,error file
PPO_my_env_f560f_00000,1,/root/ray_results/PPO_2021-10-26_21-09-08/PPO_my_env_f560f_00000_0_2021-10-26_21-09-08/error.txt


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_f560f_00000,ERROR,,767,58341.9,767000,7.1861,9.94,2.52,16.88

Trial name,# failures,error file
PPO_my_env_f560f_00000,1,/root/ray_results/PPO_2021-10-26_21-09-08/PPO_my_env_f560f_00000_0_2021-10-26_21-09-08/error.txt


TuneError: ('Trials did not complete', [PPO_my_env_f560f_00000])

2021-10-28 14:45:24,660	ERROR worker.py:475 -- print_logs: Connection closed by server.
2021-10-28 14:45:24,661	ERROR worker.py:1217 -- listen_error_messages_raylet: Connection closed by server.
2021-10-28 14:45:24,661	ERROR import_thread.py:88 -- ImportThread: Connection closed by server.
