In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU()
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        visual_features = self.visual_encoder(pov)
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [7]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C32) pretrained (AnnaCNN) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-21 19:06:50,002	INFO wandb.py:170 -- Already logged into W&B.
2021-10-21 19:06:50,018	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_0b57e_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=11962)[0m 2021-10-21 19:06:53,424	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=11962)[0m 2021-10-21 19:06:53,424	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-21_19-07-57
  done: false
  episode_len_mean: 386.0
  episode_media: {}
  episode_reward_max: -3.699999999999965
  episode_reward_mean: -3.859999999999962
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8657582283020018
          entropy_coeff: 0.009999999999999998
          kl: 0.011295225388316114
          policy_loss: 0.02128616985347536
          total_loss: -0.0028004381391737196
          vf_explained_var: 0.027744241058826447
          vf_loss: 0.002311929318238981
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,1,58.1575,1000,-3.86,-3.7,-4.02,386


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-21_19-08-19
  done: false
  episode_len_mean: 395.8
  episode_media: {}
  episode_reward_max: -3.699999999999965
  episode_reward_mean: -4.257999999999958
  episode_reward_min: -5.359999999999953
  episodes_this_iter: 3
  episodes_total: 5
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.834957716200087
          entropy_coeff: 0.009999999999999998
          kl: 0.011457809688480362
          policy_loss: -0.01814782197276751
          total_loss: 0.30453668755168717
          vf_explained_var: 0.04040609300136566
          vf_loss: 0.34874251517467203
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,2,80.508,2000,-4.258,-3.7,-5.36,395.8


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-21_19-08-40
  done: false
  episode_len_mean: 401.0
  episode_media: {}
  episode_reward_max: -3.699999999999965
  episode_reward_mean: -4.224285714285672
  episode_reward_min: -5.359999999999953
  episodes_this_iter: 2
  episodes_total: 7
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8349191109339396
          entropy_coeff: 0.009999999999999998
          kl: 0.015375480082855691
          policy_loss: -0.06303459405899048
          total_loss: -0.0801603024204572
          vf_explained_var: 0.4462907612323761
          vf_loss: 0.00814838793853091
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,3,100.867,3000,-4.22429,-3.7,-5.36,401


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-21_19-09-01
  done: false
  episode_len_mean: 402.77777777777777
  episode_media: {}
  episode_reward_max: -3.699999999999965
  episode_reward_mean: -4.194444444444403
  episode_reward_min: -5.359999999999953
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8013875934812758
          entropy_coeff: 0.009999999999999998
          kl: 0.010863796409611836
          policy_loss: -0.26396643395225206
          total_loss: -0.03987264268928104
          vf_explained_var: -0.14763697981834412
          vf_loss: 0.2499349045360254
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,4,121.897,4000,-4.19444,-3.7,-5.36,402.778


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-21_19-09-23
  done: false
  episode_len_mean: 397.1666666666667
  episode_media: {}
  episode_reward_max: -3.4099999999999713
  episode_reward_mean: -6.288333333333313
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7516336679458617
          entropy_coeff: 0.009999999999999998
          kl: 0.011317950294766217
          policy_loss: -0.006284104453192817
          total_loss: 0.023489785194396973
          vf_explained_var: 0.458140105009079
          vf_loss: 0.05502663639684518
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,5,144.452,5000,-6.28833,-3.41,-30.35,397.167


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-21_19-09-46
  done: false
  episode_len_mean: 391.3333333333333
  episode_media: {}
  episode_reward_max: -3.4099999999999713
  episode_reward_mean: -5.766666666666643
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 15
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.784320574336582
          entropy_coeff: 0.009999999999999998
          kl: 0.006212926283664485
          policy_loss: -0.18536202195617887
          total_loss: -0.20387611132529046
          vf_explained_var: -0.06855765730142593
          vf_loss: 0.008086532294853694
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,6,166.664,6000,-5.76667,-3.41,-30.35,391.333


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-21_19-10-09
  done: false
  episode_len_mean: 387.72222222222223
  episode_media: {}
  episode_reward_max: -3.4099999999999713
  episode_reward_mean: -5.729444444444417
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 18
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7888890716764663
          entropy_coeff: 0.009999999999999998
          kl: 0.013232911376316705
          policy_loss: -0.0374074989101953
          total_loss: 0.24142726535598438
          vf_explained_var: -0.06325794011354446
          vf_loss: 0.304077071745673
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,7,190.138,7000,-5.72944,-3.41,-30.35,387.722


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-21_19-10-33
  done: false
  episode_len_mean: 385.4
  episode_media: {}
  episode_reward_max: -3.4099999999999713
  episode_reward_mean: -5.520999999999972
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7793382167816163
          entropy_coeff: 0.009999999999999998
          kl: 0.005806164545941823
          policy_loss: -0.20410702973604203
          total_loss: -0.2224707822004954
          vf_explained_var: -0.0941402018070221
          vf_loss: 0.008268397745107197
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,8,213.559,8000,-5.521,-3.41,-30.35,385.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-21_19-10-56
  done: false
  episode_len_mean: 380.2173913043478
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -5.828260869565191
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 23
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7474695761998493
          entropy_coeff: 0.009999999999999998
          kl: 0.009769350681929407
          policy_loss: -0.0042065509491496615
          total_loss: 0.194901561902629
          vf_explained_var: 0.4173086881637573
          vf_loss: 0.22462893807225756
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,9,237.349,9000,-5.82826,-2.24,-30.35,380.217


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-21_19-11-21
  done: false
  episode_len_mean: 378.11538461538464
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -5.573461538461511
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 26
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7589119328392875
          entropy_coeff: 0.009999999999999998
          kl: 0.013169888188921538
          policy_loss: -0.014173806541495852
          total_loss: -0.028068392806582982
          vf_explained_var: 0.2169017195701599
          vf_loss: 0.011060554403552993
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,10,261.462,10000,-5.57346,-2.24,-30.35,378.115


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-21_19-11-43
  done: false
  episode_len_mean: 376.13793103448273
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -5.368275862068938
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 29
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7167169279522367
          entropy_coeff: 0.009999999999999998
          kl: 0.013190728268114743
          policy_loss: -0.007356235136588414
          total_loss: -0.020772479308976067
          vf_explained_var: 0.2638947665691376
          vf_loss: 0.011112776365027659
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,11,284.007,11000,-5.36828,-2.24,-30.35,376.138




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-21_19-12-26
  done: false
  episode_len_mean: 372.4375
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -5.1806249999999725
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 32
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7508882549073963
          entropy_coeff: 0.009999999999999998
          kl: 0.01112151950361359
          policy_loss: 0.01755707586805026
          total_loss: 0.0005017403927114275
          vf_explained_var: -0.026238860562443733
          vf_loss: 0.00822924212164556
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,12,326.992,12000,-5.18062,-2.24,-30.35,372.438


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-21_19-12-50
  done: false
  episode_len_mean: 369.8285714285714
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -5.029714285714258
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 35
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7373414119084676
          entropy_coeff: 0.009999999999999998
          kl: 0.014596404035734839
          policy_loss: 0.02901813056733873
          total_loss: 0.02188054554992252
          vf_explained_var: 0.06825028359889984
          vf_loss: 0.017316550846832493
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,13,351.104,13000,-5.02971,-2.24,-30.35,369.829


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-21_19-13-13
  done: false
  episode_len_mean: 369.43243243243245
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.953783783783755
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 37
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7310907363891603
          entropy_coeff: 0.009999999999999998
          kl: 0.011079240114774336
          policy_loss: -0.0974456207619773
          total_loss: -0.10927026917537054
          vf_explained_var: 0.16954566538333893
          vf_loss: 0.013270414553375708
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,14,373.741,14000,-4.95378,-2.24,-30.35,369.432


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-21_19-13-37
  done: false
  episode_len_mean: 368.325
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.848249999999972
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 40
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.699818393919203
          entropy_coeff: 0.009999999999999998
          kl: 0.013693973132897478
          policy_loss: -0.162065526843071
          total_loss: -0.17333738638295068
          vf_explained_var: 0.44436919689178467
          vf_loss: 0.01298752908801867
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,15,398.332,15000,-4.84825,-2.24,-30.35,368.325


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-21_19-14-03
  done: false
  episode_len_mean: 365.0232558139535
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.733953488372065
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 43
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6675016005833943
          entropy_coeff: 0.009999999999999998
          kl: 0.010699830811674834
          policy_loss: -0.14293558282984628
          total_loss: -0.15958038394649823
          vf_explained_var: 0.7496100664138794
          vf_loss: 0.0078902478937784
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,16,424.125,16000,-4.73395,-2.24,-30.35,365.023


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-21_19-14-27
  done: false
  episode_len_mean: 363.2391304347826
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.6454347826086675
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 46
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6294925848642987
          entropy_coeff: 0.009999999999999998
          kl: 0.016362323851184878
          policy_loss: -0.09309206696020232
          total_loss: -0.11044523179945019
          vf_explained_var: 0.8469886183738708
          vf_loss: 0.005669295063449277
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,17,447.872,17000,-4.64543,-2.24,-30.35,363.239


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-21_19-14-53
  done: false
  episode_len_mean: 359.7
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.528999999999972
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 4
  episodes_total: 50
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6244274271859065
          entropy_coeff: 0.009999999999999998
          kl: 0.012034335356499228
          policy_loss: -0.05704166210359997
          total_loss: -0.07224840819835662
          vf_explained_var: 0.7505043745040894
          vf_loss: 0.008630661361126436
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,18,473.353,18000,-4.529,-2.24,-30.35,359.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-21_19-15-17
  done: false
  episode_len_mean: 357.47169811320754
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.453962264150916
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.611643279923333
          entropy_coeff: 0.009999999999999998
          kl: 0.018196247434142974
          policy_loss: -0.033908995075358286
          total_loss: -0.051508699274725386
          vf_explained_var: 0.8948479294776917
          vf_loss: 0.004877480028274779
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,19,498.266,19000,-4.45396,-2.24,-30.35,357.472


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-21_19-15-41
  done: false
  episode_len_mean: 357.9818181818182
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.427090909090881
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 55
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5799973752763536
          entropy_coeff: 0.009999999999999998
          kl: 0.017503558094095636
          policy_loss: -0.18049137517809868
          total_loss: -0.19565252603756056
          vf_explained_var: 0.7516571879386902
          vf_loss: 0.007138114137988951
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,20,521.305,20000,-4.42709,-2.24,-30.35,357.982


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-21_19-16-06
  done: false
  episode_len_mean: 357.01724137931035
  episode_media: {}
  episode_reward_max: -2.2399999999999856
  episode_reward_mean: -4.373620689655144
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 58
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.558344923125373
          entropy_coeff: 0.009999999999999998
          kl: 0.01770878237948765
          policy_loss: -0.1374739609244797
          total_loss: -0.15500223698715368
          vf_explained_var: 0.8068132996559143
          vf_loss: 0.004513414998331832
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,21,546.777,21000,-4.37362,-2.24,-30.35,357.017




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-21_19-16-49
  done: false
  episode_len_mean: 354.53225806451616
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.247741935483844
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 4
  episodes_total: 62
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.595480513572693
          entropy_coeff: 0.009999999999999998
          kl: 0.013355610343311891
          policy_loss: -0.10502943644920985
          total_loss: -0.08625720971160465
          vf_explained_var: 0.6515188217163086
          vf_loss: 0.0420559110171679
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,22,590.192,22000,-4.24774,-0.31,-30.35,354.532


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-21_19-17-13
  done: false
  episode_len_mean: 354.078125
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.221249999999972
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 64
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5028707371817696
          entropy_coeff: 0.009999999999999998
          kl: 0.009894294171934798
          policy_loss: -0.07479286359416114
          total_loss: -0.09234633867939314
          vf_explained_var: 0.7045983076095581
          vf_loss: 0.005496374322360174
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,23,613.337,23000,-4.22125,-0.31,-30.35,354.078


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-21_19-17-35
  done: false
  episode_len_mean: 355.13432835820896
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.289402985074599
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 67
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5435376061333552
          entropy_coeff: 0.009999999999999998
          kl: 0.011915845363487589
          policy_loss: 0.043648906383249496
          total_loss: 0.23171312092906898
          vf_explained_var: 0.2352427989244461
          vf_loss: 0.21111642270245487
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,24,636.167,24000,-4.2894,-0.31,-30.35,355.134


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-21_19-18-00
  done: false
  episode_len_mean: 354.8857142857143
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.255285714285686
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 70
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4670934677124023
          entropy_coeff: 0.009999999999999998
          kl: 0.014185566255452765
          policy_loss: -0.05962835806939337
          total_loss: -0.071238055659665
          vf_explained_var: 0.8122956156730652
          vf_loss: 0.010224123920003573
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,25,660.775,25000,-4.25529,-0.31,-30.35,354.886


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-21_19-18-24
  done: false
  episode_len_mean: 354.8767123287671
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.265890410958876
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.509858504931132
          entropy_coeff: 0.009999999999999998
          kl: 0.010216447214029895
          policy_loss: -0.06794710755348206
          total_loss: -0.06402433953351444
          vf_explained_var: 0.3501269817352295
          vf_loss: 0.02697806451987061
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,26,684.486,26000,-4.26589,-0.31,-30.35,354.877


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-21_19-18-49
  done: false
  episode_len_mean: 354.9605263157895
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.2384210526315504
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 76
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4988795359929403
          entropy_coeff: 0.009999999999999998
          kl: 0.011778774979066186
          policy_loss: -0.03491289644605584
          total_loss: -0.05223692013985581
          vf_explained_var: 0.7818740606307983
          vf_loss: 0.005309017434612744
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,27,709.287,27000,-4.23842,-0.31,-30.35,354.961


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-21_19-19-12
  done: false
  episode_len_mean: 354.5128205128205
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.216282051282023
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 78
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4680516137017143
          entropy_coeff: 0.009999999999999998
          kl: 0.018767075395102178
          policy_loss: -0.13482291052738826
          total_loss: -0.14986090146832995
          vf_explained_var: 0.6926149725914001
          vf_loss: 0.00588911201339215
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,28,732.813,28000,-4.21628,-0.31,-30.35,354.513


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-21_19-19-38
  done: false
  episode_len_mean: 353.6341463414634
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.174756097560947
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 4
  episodes_total: 82
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.359245038032532
          entropy_coeff: 0.009999999999999998
          kl: 0.012098480223652943
          policy_loss: -0.08322449955675337
          total_loss: -0.09764446318149567
          vf_explained_var: 0.7542027235031128
          vf_loss: 0.006752788177173999
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,29,758.999,29000,-4.17476,-0.31,-30.35,353.634


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-21_19-20-03
  done: false
  episode_len_mean: 353.9047619047619
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.162261904761876
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 84
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.449101177851359
          entropy_coeff: 0.009999999999999998
          kl: 0.016135060573323954
          policy_loss: -0.0029557673467530145
          total_loss: -0.021469786018133163
          vf_explained_var: 0.9237102270126343
          vf_loss: 0.002749980036686692
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,30,783.457,30000,-4.16226,-0.31,-30.35,353.905


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-21_19-20-27
  done: false
  episode_len_mean: 353.3333333333333
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.135057471264339
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 87
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.394706222746107
          entropy_coeff: 0.009999999999999998
          kl: 0.012615684133375677
          policy_loss: -0.08365947736634148
          total_loss: -0.10075405554638969
          vf_explained_var: 0.8578882217407227
          vf_loss: 0.004329344477607972
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,31,807.951,31000,-4.13506,-0.31,-30.35,353.333




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-21_19-21-08
  done: false
  episode_len_mean: 353.6666666666667
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.118333333333305
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 90
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4541764656702676
          entropy_coeff: 0.009999999999999998
          kl: 0.019330933148889944
          policy_loss: 0.054230979540281825
          total_loss: 0.037253228947520256
          vf_explained_var: 0.8804128170013428
          vf_loss: 0.003697824929582162
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,32,848.026,32000,-4.11833,-0.31,-30.35,353.667


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-21_19-21-33
  done: false
  episode_len_mean: 354.2043010752688
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.10494623655911
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 93
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.429580497741699
          entropy_coeff: 0.009999999999999998
          kl: 0.012962746333017637
          policy_loss: 0.009590322151780128
          total_loss: -0.007586473971605301
          vf_explained_var: 0.7449100017547607
          vf_loss: 0.0045264594664331526
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,33,873.728,33000,-4.10495,-0.31,-30.35,354.204


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-21_19-21-55
  done: false
  episode_len_mean: 354.8421052631579
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.099473684210497
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 95
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4045631700091894
          entropy_coeff: 0.009999999999999998
          kl: 0.012189056063088089
          policy_loss: -0.11888370878166622
          total_loss: -0.1358645257850488
          vf_explained_var: 0.8543549180030823
          vf_loss: 0.004627000807603407
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,34,895.773,34000,-4.09947,-0.31,-30.35,354.842


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-21_19-22-16
  done: false
  episode_len_mean: 356.53061224489795
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.099489795918338
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.409894824028015
          entropy_coeff: 0.009999999999999998
          kl: 0.0145439934413892
          policy_loss: -0.03977619724141227
          total_loss: -0.05576403786738714
          vf_explained_var: 0.7290173768997192
          vf_loss: 0.0052023021633633305
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,35,916.106,35000,-4.09949,-0.31,-30.35,356.531


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-21_19-22-35
  done: false
  episode_len_mean: 358.04
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.10389999999997
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.393940464655558
          entropy_coeff: 0.009999999999999998
          kl: 0.012290325742053227
          policy_loss: 0.06612243155638377
          total_loss: 0.04911440759897232
          vf_explained_var: 0.38442447781562805
          vf_loss: 0.0044733094588284275
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,36,935.903,36000,-4.1039,-0.31,-30.35,358.04


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-21_19-22-55
  done: false
  episode_len_mean: 358.48
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.108299999999969
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 102
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3802956422170003
          entropy_coeff: 0.009999999999999998
          kl: 0.016276576555315246
          policy_loss: -0.15758133083581924
          total_loss: -0.17333394951290554
          vf_explained_var: 0.6834703683853149
          vf_loss: 0.004795018858082282
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,37,955.696,37000,-4.1083,-0.31,-30.35,358.48


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-21_19-23-15
  done: false
  episode_len_mean: 359.44
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.10289999999997
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3723293860753376
          entropy_coeff: 0.009999999999999998
          kl: 0.01514372777350178
          policy_loss: -0.01580399920543035
          total_loss: -0.029669371743996937
          vf_explained_var: 0.3043735921382904
          vf_loss: 0.0068291767086419795
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,38,975.625,38000,-4.1029,-0.31,-30.35,359.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-21_19-23-35
  done: false
  episode_len_mean: 359.42
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.102699999999969
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 107
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.356879120402866
          entropy_coeff: 0.009999999999999998
          kl: 0.012658020611351913
          policy_loss: -0.05009599228700002
          total_loss: -0.06352692693471909
          vf_explained_var: 0.5957118272781372
          vf_loss: 0.007606246171053499
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,39,995.117,39000,-4.1027,-0.31,-30.35,359.42


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-21_19-23-54
  done: false
  episode_len_mean: 359.75
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -4.10599999999997
  episode_reward_min: -30.350000000000193
  episodes_this_iter: 2
  episodes_total: 109
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.326289510726929
          entropy_coeff: 0.009999999999999998
          kl: 0.012081803115909897
          policy_loss: -0.1178959290186564
          total_loss: -0.130240402619044
          vf_explained_var: 0.3549363613128662
          vf_loss: 0.00850205537216324
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,40,1014.39,40000,-4.106,-0.31,-30.35,359.75


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-21_19-24-14
  done: false
  episode_len_mean: 360.86
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.8540999999999666
  episode_reward_min: -11.289999999999973
  episodes_this_iter: 3
  episodes_total: 112
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.307250973913405
          entropy_coeff: 0.009999999999999998
          kl: 0.010911622580891962
          policy_loss: 0.023840767476293774
          total_loss: 0.011434699098269144
          vf_explained_var: 0.5740939974784851
          vf_loss: 0.008484115827983866
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,41,1034.25,41000,-3.8541,-0.31,-11.29,360.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-21_19-24-35
  done: false
  episode_len_mean: 361.66
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.862099999999967
  episode_reward_min: -11.289999999999973
  episodes_this_iter: 2
  episodes_total: 114
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.319406541188558
          entropy_coeff: 0.009999999999999998
          kl: 0.012451853551461294
          policy_loss: -0.11745893723434872
          total_loss: -0.12949712740050423
          vf_explained_var: 0.6026422381401062
          vf_loss: 0.008665504421676613
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,42,1055.41,42000,-3.8621,-0.31,-11.29,361.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-21_19-24-56
  done: false
  episode_len_mean: 362.1
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.8110999999999664
  episode_reward_min: -11.289999999999973
  episodes_this_iter: 3
  episodes_total: 117
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.271255368656582
          entropy_coeff: 0.009999999999999998
          kl: 0.008666866915657954
          policy_loss: 0.05737675055861473
          total_loss: 0.04537973205248515
          vf_explained_var: 0.4399288594722748
          vf_loss: 0.008982159820799198
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,43,1075.88,43000,-3.8111,-0.31,-11.29,362.1


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-21_19-25-16
  done: false
  episode_len_mean: 363.17
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.8217999999999663
  episode_reward_min: -11.289999999999973
  episodes_this_iter: 2
  episodes_total: 119
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.257058220439487
          entropy_coeff: 0.009999999999999998
          kl: 0.01299609386735039
          policy_loss: -0.11695978724294238
          total_loss: -0.12725812097390493
          vf_explained_var: 0.49232977628707886
          vf_loss: 0.009673030470083985
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,44,1096.28,44000,-3.8218,-0.31,-11.29,363.17




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-21_19-25-56
  done: false
  episode_len_mean: 363.74
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7722999999999667
  episode_reward_min: -11.289999999999973
  episodes_this_iter: 3
  episodes_total: 122
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1923533545600042
          entropy_coeff: 0.009999999999999998
          kl: 0.01211721544751666
          policy_loss: -0.07909980416297913
          total_loss: -0.08413823520143827
          vf_explained_var: 0.32320061326026917
          vf_loss: 0.014461655666430791
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 4500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,45,1136.35,45000,-3.7723,-0.31,-11.29,363.74


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-21_19-26-18
  done: false
  episode_len_mean: 364.16
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.6990999999999667
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1948877493540446
          entropy_coeff: 0.009999999999999998
          kl: 0.010199339058834678
          policy_loss: 0.06583171412348747
          total_loss: 0.05624578007393413
          vf_explained_var: 0.5068028569221497
          vf_loss: 0.010323072144658201
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,46,1158.38,46000,-3.6991,-0.31,-9.64,364.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-21_19-26-41
  done: false
  episode_len_mean: 364.02
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.6976999999999665
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 128
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1475623899035985
          entropy_coeff: 0.009999999999999998
          kl: 0.008415855648194966
          policy_loss: 0.012890707453091938
          total_loss: 0.004590056836605072
          vf_explained_var: 0.44939982891082764
          vf_loss: 0.01149180284830638
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,47,1180.93,47000,-3.6977,-0.31,-9.64,364.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-21_19-27-04
  done: false
  episode_len_mean: 364.28
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7002999999999666
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 2
  episodes_total: 130
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1225398778915405
          entropy_coeff: 0.009999999999999998
          kl: 0.01001393439847755
          policy_loss: -0.15209758977095286
          total_loss: -0.1598799863623248
          vf_explained_var: 0.42644399404525757
          vf_loss: 0.011440210534621858
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,48,1203.75,48000,-3.7003,-0.31,-9.64,364.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-21_19-27-25
  done: false
  episode_len_mean: 364.79
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7053999999999663
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 133
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.992095586988661
          entropy_coeff: 0.009999999999999998
          kl: 0.010923694023524928
          policy_loss: -0.1028696283698082
          total_loss: -0.10825804935561287
          vf_explained_var: 0.41333287954330444
          vf_loss: 0.012347789926247464
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,49,1225.56,49000,-3.7054,-0.31,-9.64,364.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-21_19-27-48
  done: false
  episode_len_mean: 365.0
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7074999999999663
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 136
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.108651857905918
          entropy_coeff: 0.009999999999999998
          kl: 0.012297173409767579
          policy_loss: 0.010052080949147542
          total_loss: 0.0023709690405262843
          vf_explained_var: 0.33697646856307983
          vf_loss: 0.010945972502748999
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,50,1247.72,50000,-3.7075,-0.31,-9.64,365


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-21_19-28-09
  done: false
  episode_len_mean: 365.26
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7100999999999664
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 139
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0598877986272175
          entropy_coeff: 0.009999999999999998
          kl: 0.012585803507369498
          policy_loss: 0.023948162297407784
          total_loss: 0.01760787078075939
          vf_explained_var: 0.27037858963012695
          vf_loss: 0.011741425221165022
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,51,1268.73,51000,-3.7101,-0.31,-9.64,365.26


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-21_19-28-30
  done: false
  episode_len_mean: 366.18
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.719299999999966
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 2
  episodes_total: 141
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0100540227360195
          entropy_coeff: 0.009999999999999998
          kl: 0.01123721669620544
          policy_loss: -0.11726658013131884
          total_loss: -0.1242384456925922
          vf_explained_var: 0.16120211780071259
          vf_loss: 0.010881230581112
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,52,1289.8,52000,-3.7193,-0.31,-9.64,366.18


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-21_19-28-52
  done: false
  episode_len_mean: 367.64
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.733899999999965
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 144
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9680156813727485
          entropy_coeff: 0.009999999999999998
          kl: 0.009928170891450818
          policy_loss: -0.00345260136657291
          total_loss: -0.009542571587695016
          vf_explained_var: 0.2288280874490738
          vf_loss: 0.011604552591840426
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,53,1311.88,53000,-3.7339,-0.31,-9.64,367.64


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-21_19-29-15
  done: false
  episode_len_mean: 367.84
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.735899999999965
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 147
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9882067388958402
          entropy_coeff: 0.009999999999999998
          kl: 0.008255381422357072
          policy_loss: 0.01999501221709781
          total_loss: 0.013708692044019699
          vf_explained_var: -0.10807877779006958
          vf_loss: 0.011944672148416026
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,54,1335.5,54000,-3.7359,-0.31,-9.64,367.84




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-21_19-29-56
  done: false
  episode_len_mean: 367.7
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.734499999999966
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 150
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9181897322336832
          entropy_coeff: 0.009999999999999998
          kl: 0.01480288802160256
          policy_loss: -0.10791927932037247
          total_loss: -0.10893536797828145
          vf_explained_var: 0.23004089295864105
          vf_loss: 0.015205232002254989
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,55,1376.54,55000,-3.7345,-0.31,-9.64,367.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-21_19-30-23
  done: false
  episode_len_mean: 367.56
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.7330999999999652
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9338653829362658
          entropy_coeff: 0.009999999999999998
          kl: 0.012931630082598507
          policy_loss: -0.14561938982870842
          total_loss: -0.1486085266702705
          vf_explained_var: 0.3794640302658081
          vf_loss: 0.013763191054264705
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,56,1403.19,56000,-3.7331,-0.31,-9.64,367.56


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-21_19-30-47
  done: false
  episode_len_mean: 366.5
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.722499999999967
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 156
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9845070507791307
          entropy_coeff: 0.009999999999999998
          kl: 0.013980680032921317
          policy_loss: -0.11591532097922431
          total_loss: -0.12359949077169101
          vf_explained_var: 0.715305745601654
          vf_loss: 0.009364767015601199
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,57,1427.5,57000,-3.7225,-0.31,-9.64,366.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-21_19-31-11
  done: false
  episode_len_mean: 366.54
  episode_media: {}
  episode_reward_max: -0.3099999999999791
  episode_reward_mean: -3.722899999999966
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 4
  episodes_total: 160
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8766119162241617
          entropy_coeff: 0.009999999999999998
          kl: 0.007463995670362719
          policy_loss: -0.04342284256385432
          total_loss: -0.052431420029865373
          vf_explained_var: 0.6617512702941895
          vf_loss: 0.008264742313056357
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,58,1451.5,58000,-3.7229,-0.31,-9.64,366.54


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-21_19-31-35
  done: false
  episode_len_mean: 367.0
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.757999999999966
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 2
  episodes_total: 162
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.990265056822035
          entropy_coeff: 0.009999999999999998
          kl: 0.01140665509373865
          policy_loss: -0.08861447340912289
          total_loss: -0.09569992969433466
          vf_explained_var: 0.48468950390815735
          vf_loss: 0.01053585526274724
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,59,1474.56,59000,-3.758,-2.9,-9.64,367


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-21_19-31-58
  done: false
  episode_len_mean: 366.51
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.753099999999966
  episode_reward_min: -9.639999999999967
  episodes_this_iter: 3
  episodes_total: 165
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.907264647218916
          entropy_coeff: 0.009999999999999998
          kl: 0.011334383616652843
          policy_loss: -0.15263043886257543
          total_loss: -0.16113194889492458
          vf_explained_var: 0.7306860089302063
          vf_loss: 0.008304259688076045
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,60,1497.68,60000,-3.7531,-2.9,-9.64,366.51


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-21_19-32-21
  done: false
  episode_len_mean: 365.65
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.685499999999966
  episode_reward_min: -6.599999999999968
  episodes_this_iter: 3
  episodes_total: 168
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8818542732132806
          entropy_coeff: 0.009999999999999998
          kl: 0.013687607824975117
          policy_loss: -0.13400154879523649
          total_loss: -0.14020450086229377
          vf_explained_var: 0.7122263312339783
          vf_loss: 0.00987806796717147
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,61,1521.08,61000,-3.6855,-2.9,-6.6,365.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-21_19-32-44
  done: false
  episode_len_mean: 365.31
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.6820999999999655
  episode_reward_min: -6.599999999999968
  episodes_this_iter: 3
  episodes_total: 171
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.852655169698927
          entropy_coeff: 0.009999999999999998
          kl: 0.027784833774426136
          policy_loss: -0.12572577902012402
          total_loss: -0.1281279705050919
          vf_explained_var: 0.6279292106628418
          vf_loss: 0.010567389852884744
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,62,1544.05,62000,-3.6821,-2.9,-6.6,365.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-21_19-33-07
  done: false
  episode_len_mean: 364.57
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.6456999999999664
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 174
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.789022327793969
          entropy_coeff: 0.009999999999999998
          kl: 0.017258653596784374
          policy_loss: -0.12080993056297303
          total_loss: -0.12498603413502375
          vf_explained_var: 0.6210938096046448
          vf_loss: 0.008536522711316745
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,63,1566.81,63000,-3.6457,-2.9,-4.55,364.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-21_19-33-30
  done: false
  episode_len_mean: 364.96
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.649599999999966
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 177
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9359246889750164
          entropy_coeff: 0.009999999999999998
          kl: 0.023425225745818245
          policy_loss: -0.019322933753331502
          total_loss: -0.024855818019972907
          vf_explained_var: 0.6785748600959778
          vf_loss: 0.006798795379129135
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,64,1589.57,64000,-3.6496,-2.9,-4.55,364.96




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-21_19-34-10
  done: false
  episode_len_mean: 364.02
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.6401999999999664
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 180
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9951653798421225
          entropy_coeff: 0.009999999999999998
          kl: 0.01381501595963047
          policy_loss: -0.13559356646405327
          total_loss: -0.1399110685620043
          vf_explained_var: 0.6045981049537659
          vf_loss: 0.009417394890139501
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,65,1629.93,65000,-3.6402,-2.9,-4.55,364.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-21_19-34-36
  done: false
  episode_len_mean: 363.28
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.6327999999999663
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 183
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8176592879825169
          entropy_coeff: 0.009999999999999998
          kl: 0.013169312095146463
          policy_loss: -0.15393986817863253
          total_loss: -0.16064100844992532
          vf_explained_var: 0.798921525478363
          vf_loss: 0.005549262324348092
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,66,1656.32,66000,-3.6328,-2.9,-4.55,363.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-21_19-35-01
  done: false
  episode_len_mean: 363.41
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.634099999999967
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 186
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.754891324043274
          entropy_coeff: 0.009999999999999998
          kl: 0.01146111394165585
          policy_loss: -0.12186181338296996
          total_loss: -0.12930425215098593
          vf_explained_var: 0.8468206524848938
          vf_loss: 0.004948971922405892
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,67,1680.33,67000,-3.6341,-2.9,-4.55,363.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-21_19-35-26
  done: false
  episode_len_mean: 361.16
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.611599999999967
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 4
  episodes_total: 190
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7996381998062134
          entropy_coeff: 0.009999999999999998
          kl: 0.012178030630403154
          policy_loss: 0.051040599329604044
          total_loss: 0.04307532285650571
          vf_explained_var: 0.8885879516601562
          vf_loss: 0.004550993377860222
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,68,1706.11,68000,-3.6116,-2.9,-4.55,361.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-21_19-35-51
  done: false
  episode_len_mean: 359.5
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.594999999999968
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 193
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.776658648914761
          entropy_coeff: 0.009999999999999998
          kl: 0.00860495438640519
          policy_loss: -0.015670580913623174
          total_loss: -0.020734242101510366
          vf_explained_var: 0.524091899394989
          vf_loss: 0.008830698002647195
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,69,1730.88,69000,-3.595,-2.9,-4.55,359.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-21_19-36-17
  done: false
  episode_len_mean: 356.94
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.569399999999968
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 196
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7151250229941475
          entropy_coeff: 0.009999999999999998
          kl: 0.010617842042853374
          policy_loss: 0.06458487924602296
          total_loss: 0.060127165582444936
          vf_explained_var: 0.556567370891571
          vf_loss: 0.007915510507559197
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,70,1756.35,70000,-3.5694,-2.9,-4.55,356.94


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-21_19-36-42
  done: false
  episode_len_mean: 353.93
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.539299999999969
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 199
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7531708505418566
          entropy_coeff: 0.009999999999999998
          kl: 0.011724906352685239
          policy_loss: -0.0494936595360438
          total_loss: -0.05890826541516516
          vf_explained_var: 0.9341296553611755
          vf_loss: 0.0028408923192829306
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,71,1781.92,71000,-3.5393,-2.9,-4.55,353.93


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-21_19-37-06
  done: false
  episode_len_mean: 351.34
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.5133999999999683
  episode_reward_min: -4.549999999999947
  episodes_this_iter: 3
  episodes_total: 202
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8543269832928975
          entropy_coeff: 0.009999999999999998
          kl: 0.012484833468773123
          policy_loss: -0.026266560206810632
          total_loss: -0.03595552560355928
          vf_explained_var: 0.9218326210975647
          vf_loss: 0.003236128459684551
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,72,1805.98,72000,-3.5134,-2.9,-4.55,351.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-21_19-37-30
  done: false
  episode_len_mean: 348.38
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.48379999999997
  episode_reward_min: -4.4999999999999485
  episodes_this_iter: 3
  episodes_total: 205
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9195989807446798
          entropy_coeff: 0.009999999999999998
          kl: 0.01746496835023758
          policy_loss: -0.03529327466256089
          total_loss: -0.0425175199078189
          vf_explained_var: 0.9294339418411255
          vf_loss: 0.004112508235913184
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,73,1829.7,73000,-3.4838,-2.9,-4.5,348.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-21_19-37-55
  done: false
  episode_len_mean: 345.23
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.45229999999997
  episode_reward_min: -4.4999999999999485
  episodes_this_iter: 3
  episodes_total: 208
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7534406450059679
          entropy_coeff: 0.009999999999999998
          kl: 0.022059097867222575
          policy_loss: -0.03403558929761251
          total_loss: -0.03883902596102821
          vf_explained_var: 0.9645445942878723
          vf_loss: 0.0028043742783160673
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,74,1854.92,74000,-3.4523,-2.9,-4.5,345.23




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-21_19-38-37
  done: false
  episode_len_mean: 342.19
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.4218999999999706
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 211
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7611312773492602
          entropy_coeff: 0.009999999999999998
          kl: 0.009828018001889186
          policy_loss: -0.173153598441018
          total_loss: -0.1767379942867491
          vf_explained_var: 0.7978947758674622
          vf_loss: 0.0073930030753318636
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,75,1896.77,75000,-3.4219,-2.9,-4.23,342.19


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-21_19-39-03
  done: false
  episode_len_mean: 339.41
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.3940999999999715
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 215
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.775638915432824
          entropy_coeff: 0.009999999999999998
          kl: 0.01394694802611285
          policy_loss: -0.12994551145368152
          total_loss: -0.13079359067810906
          vf_explained_var: 0.8677961826324463
          vf_loss: 0.007494121255715274
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,76,1922.38,76000,-3.3941,-2.9,-4.23,339.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-21_19-39-27
  done: false
  episode_len_mean: 337.12
  episode_media: {}
  episode_reward_max: -2.899999999999982
  episode_reward_mean: -3.3711999999999716
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 218
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8111559947331746
          entropy_coeff: 0.009999999999999998
          kl: 0.013686745008987403
          policy_loss: -0.1030316105319394
          total_loss: -0.10884830864767234
          vf_explained_var: 0.941127598285675
          vf_loss: 0.003056308264300848
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,77,1946.94,77000,-3.3712,-2.9,-4.23,337.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-21_19-39-52
  done: false
  episode_len_mean: 334.8
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.3479999999999723
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 221
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6723890980084737
          entropy_coeff: 0.009999999999999998
          kl: 0.012353271935467921
          policy_loss: -0.03594511664576001
          total_loss: -0.041949816275801924
          vf_explained_var: 0.9560829997062683
          vf_loss: 0.002380731110719757
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,78,1972.05,78000,-3.348,-2.88,-3.88,334.8


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-21_19-40-17
  done: false
  episode_len_mean: 333.47
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.334699999999972
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 224
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.697612342569563
          entropy_coeff: 0.009999999999999998
          kl: 0.012743004773629909
          policy_loss: 0.038573953592114976
          total_loss: 0.033573534753587514
          vf_explained_var: 0.9159453511238098
          vf_loss: 0.0033741776689162686
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,79,1996.27,79000,-3.3347,-2.88,-3.88,333.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-21_19-40-44
  done: false
  episode_len_mean: 331.81
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.318099999999973
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 227
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5618544989162022
          entropy_coeff: 0.009999999999999998
          kl: 0.013928507556560197
          policy_loss: -0.006660119278563394
          total_loss: -0.009587628187404738
          vf_explained_var: 0.9274080991744995
          vf_loss: 0.003289292561304238
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 8000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,80,2023.49,80000,-3.3181,-2.88,-3.88,331.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-21_19-41-08
  done: false
  episode_len_mean: 331.12
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.311199999999973
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 230
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7299380011028713
          entropy_coeff: 0.009999999999999998
          kl: 0.011960163002864179
          policy_loss: -0.07082239497039053
          total_loss: -0.07311199800007873
          vf_explained_var: 0.7338865399360657
          vf_loss: 0.006936666009844177
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,81,2047.55,81000,-3.3112,-2.88,-3.88,331.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-21_19-41-32
  done: false
  episode_len_mean: 330.21
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.3020999999999727
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 233
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7002437909444172
          entropy_coeff: 0.009999999999999998
          kl: 0.012735000165441197
          policy_loss: -0.036350368460019426
          total_loss: -0.04180424354142613
          vf_explained_var: 0.9371273517608643
          vf_loss: 0.0029524371286647187
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,82,2071.95,82000,-3.3021,-2.88,-3.88,330.21


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-21_19-41-56
  done: false
  episode_len_mean: 329.81
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.2980999999999727
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 236
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5875127845340304
          entropy_coeff: 0.009999999999999998
          kl: 0.016325715254218092
          policy_loss: -0.022360521720515357
          total_loss: -0.02019483811325497
          vf_explained_var: 0.596435010433197
          vf_loss: 0.0070209501078352336
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 8300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,83,2095.92,83000,-3.2981,-2.88,-3.88,329.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-21_19-42-21
  done: false
  episode_len_mean: 328.46
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.2845999999999735
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 239
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.622867586877611
          entropy_coeff: 0.009999999999999998
          kl: 0.010489769691238255
          policy_loss: -0.0337921222878827
          total_loss: -0.0383623245689604
          vf_explained_var: 0.8712539672851562
          vf_loss: 0.004577881943744918
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,84,2120.81,84000,-3.2846,-2.88,-3.88,328.46




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-21_19-43-05
  done: false
  episode_len_mean: 326.4
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.263999999999975
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 242
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6333614309628806
          entropy_coeff: 0.009999999999999998
          kl: 0.013611164999919505
          policy_loss: -0.11137625699241956
          total_loss: -0.11528905034065247
          vf_explained_var: 0.9261769652366638
          vf_loss: 0.0032332860411972636
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,85,2164.96,85000,-3.264,-2.88,-3.85,326.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-21_19-43-30
  done: false
  episode_len_mean: 325.9
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.258999999999974
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 245
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6507338020536635
          entropy_coeff: 0.009999999999999998
          kl: 0.010034941886881851
          policy_loss: -0.08049346150623428
          total_loss: -0.08373910047941738
          vf_explained_var: 0.793397843837738
          vf_loss: 0.006488113211364382
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,86,2189.6,86000,-3.259,-2.88,-3.85,325.9


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-21_19-43-55
  done: false
  episode_len_mean: 325.58
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.255799999999975
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 248
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6482422682974074
          entropy_coeff: 0.009999999999999998
          kl: 0.012391607387465559
          policy_loss: -0.17039236393239762
          total_loss: -0.1763384610000584
          vf_explained_var: 0.9458114504814148
          vf_loss: 0.0021719912830222812
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,87,2214.53,87000,-3.2558,-2.88,-3.85,325.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-21_19-44-22
  done: false
  episode_len_mean: 325.67
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.256699999999974
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 252
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5690282649464078
          entropy_coeff: 0.009999999999999998
          kl: 0.009499824938822638
          policy_loss: -0.022941428195271226
          total_loss: -0.028975855973031785
          vf_explained_var: 0.9021323919296265
          vf_loss: 0.0032434725558333514
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 8800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,88,2241.57,88000,-3.2567,-2.88,-3.85,325.67


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-21_19-44-46
  done: false
  episode_len_mean: 326.79
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.2678999999999743
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 2
  episodes_total: 254
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6750827021068997
          entropy_coeff: 0.009999999999999998
          kl: 0.011745323348224682
          policy_loss: -0.06618222329351638
          total_loss: -0.06363073347343338
          vf_explained_var: 0.489666610956192
          vf_loss: 0.011374220828939643
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,89,2265,89000,-3.2679,-2.88,-3.95,326.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-21_19-45-10
  done: false
  episode_len_mean: 327.38
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.273799999999974
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 257
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7520219087600708
          entropy_coeff: 0.009999999999999998
          kl: 0.009664379380642114
          policy_loss: -0.11438539202014605
          total_loss: -0.11999995393885507
          vf_explained_var: 0.8133434057235718
          vf_loss: 0.005382199456087417
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,90,2289.57,90000,-3.2738,-2.88,-3.95,327.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-21_19-45-36
  done: false
  episode_len_mean: 328.12
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.281199999999974
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 260
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5878500964906481
          entropy_coeff: 0.009999999999999998
          kl: 0.007571491578506734
          policy_loss: 0.10232799442278015
          total_loss: 0.09705682906011741
          vf_explained_var: 0.78887540102005
          vf_loss: 0.0054965785626942916
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,91,2315.76,91000,-3.2812,-2.88,-3.95,328.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-21_19-46-18
  done: false
  episode_len_mean: 327.84
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.278399999999974
  episode_reward_min: -3.9499999999999598
  episodes_this_iter: 3
  episodes_total: 263
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5622868339220684
          entropy_coeff: 0.009999999999999998
          kl: 0.013354463512514849
          policy_loss: -0.01308398296435674
          total_loss: -0.01715466868546274
          vf_explained_var: 0.9348082542419434
          vf_loss: 0.0025379218355131646
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,92,2357.4,92000,-3.2784,-2.88,-3.95,327.84


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-21_19-46-47
  done: false
  episode_len_mean: 328.43
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.2842999999999747
  episode_reward_min: -4.149999999999956
  episodes_this_iter: 3
  episodes_total: 266
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6611911217371622
          entropy_coeff: 0.009999999999999998
          kl: 0.013984002303852
          policy_loss: -0.027311735124223763
          total_loss: -0.027745522869129974
          vf_explained_var: 0.6957138180732727
          vf_loss: 0.006738920500760691
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,93,2385.84,93000,-3.2843,-2.88,-4.15,328.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-21_19-47-12
  done: false
  episode_len_mean: 329.65
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.296499999999974
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 2
  episodes_total: 268
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7191457682185702
          entropy_coeff: 0.009999999999999998
          kl: 0.013505825688684601
          policy_loss: -0.15599573006232578
          total_loss: -0.15333501332336003
          vf_explained_var: 0.4276440441608429
          vf_loss: 0.010735739793421493
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,94,2411.72,94000,-3.2965,-2.88,-4.18,329.65




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-21_19-47-55
  done: false
  episode_len_mean: 331.43
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.314299999999973
  episode_reward_min: -4.2199999999999545
  episodes_this_iter: 3
  episodes_total: 271
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5886069218317667
          entropy_coeff: 0.009999999999999998
          kl: 0.009487690291975387
          policy_loss: 0.0327508135802216
          total_loss: 0.03131039316455523
          vf_explained_var: 0.7071877121925354
          vf_loss: 0.008041456753077606
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,95,2454.07,95000,-3.3143,-2.88,-4.22,331.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-21_19-48-18
  done: false
  episode_len_mean: 333.27
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.332699999999973
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 273
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8597992777824401
          entropy_coeff: 0.009999999999999998
          kl: 0.011574251027867799
          policy_loss: -0.0947430744767189
          total_loss: -0.09692153417401844
          vf_explained_var: 0.43876615166664124
          vf_loss: 0.008606914723188513
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,96,2477.57,96000,-3.3327,-2.88,-4.62,333.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-21_19-48-45
  done: false
  episode_len_mean: 334.73
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.347299999999973
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 276
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7841231730249194
          entropy_coeff: 0.009999999999999998
          kl: 0.013010415136792138
          policy_loss: -0.007614829142888387
          total_loss: -0.010155757309661971
          vf_explained_var: 0.6389760375022888
          vf_loss: 0.006518274792728739
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,97,2504.19,97000,-3.3473,-2.88,-4.62,334.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-21_19-49-10
  done: false
  episode_len_mean: 336.6
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.3659999999999717
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 278
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8553438199890984
          entropy_coeff: 0.009999999999999998
          kl: 0.011049566544593069
          policy_loss: 0.0017921638157632617
          total_loss: -0.003626095006863276
          vf_explained_var: 0.38847824931144714
          vf_loss: 0.005676723069821795
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 9800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,98,2529.53,98000,-3.366,-2.88,-4.62,336.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-21_19-49-37
  done: false
  episode_len_mean: 338.82
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.3881999999999715
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 280
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.823193493154314
          entropy_coeff: 0.009999999999999998
          kl: 0.011343001202129974
          policy_loss: -0.08008379820320341
          total_loss: -0.08284990754392411
          vf_explained_var: 0.202444925904274
          vf_loss: 0.007809297807721628
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,99,2556.4,99000,-3.3882,-2.88,-4.62,338.82


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-21_19-50-03
  done: false
  episode_len_mean: 341.71
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.4170999999999707
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 283
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6765416198306613
          entropy_coeff: 0.009999999999999998
          kl: 0.010287904677237636
          policy_loss: 0.05228787221842342
          total_loss: 0.0531657214793894
          vf_explained_var: 0.48273319005966187
          vf_loss: 0.010698930915289869
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 1000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,100,2582.01,100000,-3.4171,-2.88,-4.62,341.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-21_19-50-33
  done: false
  episode_len_mean: 342.2
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.421999999999971
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 285
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.524422726366255
          entropy_coeff: 0.009999999999999998
          kl: 0.007118011638650914
          policy_loss: -0.08772384002804756
          total_loss: -0.08994325829876794
          vf_explained_var: 0.5794902443885803
          vf_loss: 0.008220147834314654
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 10100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,101,2611.86,101000,-3.422,-2.88,-4.62,342.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-21_19-51-00
  done: false
  episode_len_mean: 344.52
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.445199999999971
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 288
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.770877312289344
          entropy_coeff: 0.009999999999999998
          kl: 0.010950622387989817
          policy_loss: 0.037512409438689547
          total_loss: 0.0318484420577685
          vf_explained_var: 0.6265181303024292
          vf_loss: 0.004653137544583943
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,102,2638.82,102000,-3.4452,-2.88,-4.62,344.52


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-21_19-51-27
  done: false
  episode_len_mean: 346.62
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.4661999999999704
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 291
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7108456505669487
          entropy_coeff: 0.009999999999999998
          kl: 0.009451068811840062
          policy_loss: 0.06405104804370139
          total_loss: 0.05766386356618669
          vf_explained_var: 0.6868724226951599
          vf_loss: 0.004341797127077977
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 1030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,103,2666.31,103000,-3.4662,-2.88,-4.62,346.62


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-21_19-51-55
  done: false
  episode_len_mean: 347.73
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.47729999999997
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 293
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.761409346262614
          entropy_coeff: 0.009999999999999998
          kl: 0.010500487429519501
          policy_loss: -0.08507671604553858
          total_loss: -0.08928972002532747
          vf_explained_var: 0.18073861300945282
          vf_loss: 0.006313257302261061
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 1040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,104,2694.16,104000,-3.4773,-2.88,-4.62,347.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-21_19-52-21
  done: false
  episode_len_mean: 349.58
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.4957999999999703
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 296
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7641837000846863
          entropy_coeff: 0.009999999999999998
          kl: 0.009667610971264177
          policy_loss: -0.0009599348737133874
          total_loss: -0.007811731431219313
          vf_explained_var: 0.8548159003257751
          vf_loss: 0.004264403839543875
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,105,2720.41,105000,-3.4958,-2.88,-4.62,349.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-21_19-52-51
  done: false
  episode_len_mean: 351.03
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5102999999999698
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 299
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7562740431891548
          entropy_coeff: 0.009999999999999998
          kl: 0.007175060868073283
          policy_loss: 0.06277348846197128
          total_loss: 0.060313392761680816
          vf_explained_var: 0.655014157295227
          vf_loss: 0.01025947879275514
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 10600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,106,2750.21,106000,-3.5103,-2.88,-4.62,351.03




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-21_19-53-36
  done: false
  episode_len_mean: 352.35
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5234999999999688
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 302
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.723007196850247
          entropy_coeff: 0.009999999999999998
          kl: 0.008972413121857568
          policy_loss: -0.0461468104687002
          total_loss: -0.045427858498361375
          vf_explained_var: 0.3567286729812622
          vf_loss: 0.011892641141700248
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,107,2794.82,107000,-3.5235,-2.88,-4.62,352.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-21_19-54-01
  done: false
  episode_len_mean: 353.41
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5340999999999685
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 304
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8178303546375698
          entropy_coeff: 0.009999999999999998
          kl: 0.011001014188769039
          policy_loss: 0.007645521892441644
          total_loss: 0.004443001829915577
          vf_explained_var: 0.5125846266746521
          vf_loss: 0.007550096118615734
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,108,2820.27,108000,-3.5341,-2.88,-4.62,353.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-21_19-54-25
  done: false
  episode_len_mean: 356.32
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.563199999999968
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 307
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.829928465684255
          entropy_coeff: 0.009999999999999998
          kl: 0.012614137315900997
          policy_loss: -0.03336004780398475
          total_loss: -0.03362179969747861
          vf_explained_var: 0.35329627990722656
          vf_loss: 0.00952299019942681
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 1090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,109,2844.21,109000,-3.5632,-2.88,-4.62,356.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-21_19-54-49
  done: false
  episode_len_mean: 357.45
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5744999999999676
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 309
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.75730472140842
          entropy_coeff: 0.009999999999999998
          kl: 0.006185479392753616
          policy_loss: 0.044134987890720366
          total_loss: 0.03814969989988539
          vf_explained_var: 0.293763130903244
          vf_loss: 0.007412558805663138
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,110,2867.94,110000,-3.5745,-2.88,-4.62,357.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-21_19-55-15
  done: false
  episode_len_mean: 359.37
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.5936999999999664
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 312
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8498931659592524
          entropy_coeff: 0.009999999999999998
          kl: 0.007388139986647207
          policy_loss: 0.04091938626435068
          total_loss: 0.035551922768354415
          vf_explained_var: -0.0609564371407032
          vf_loss: 0.008144473657011986
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,111,2893.74,111000,-3.5937,-2.88,-4.62,359.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-21_19-55-41
  done: false
  episode_len_mean: 360.38
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.603799999999967
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 314
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6566617117987739
          entropy_coeff: 0.009999999999999998
          kl: 0.011054015652901702
          policy_loss: -0.11250258071555032
          total_loss: -0.11106710367732578
          vf_explained_var: 0.14755181968212128
          vf_loss: 0.010540633876290586
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,112,2919.92,112000,-3.6038,-2.88,-4.62,360.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-21_19-56-03
  done: false
  episode_len_mean: 362.74
  episode_media: {}
  episode_reward_max: -2.8799999999999826
  episode_reward_mean: -3.627399999999966
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 317
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6822173648410372
          entropy_coeff: 0.009999999999999998
          kl: 0.009253425540809504
          policy_loss: 0.004041025539239248
          total_loss: 0.0051990896463394165
          vf_explained_var: 0.2789665162563324
          vf_loss: 0.011734175979573694
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,113,2942.14,113000,-3.6274,-2.88,-4.62,362.74


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-21_19-56-28
  done: false
  episode_len_mean: 364.87
  episode_media: {}
  episode_reward_max: -2.9399999999999813
  episode_reward_mean: -3.648699999999966
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 319
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9094383001327515
          entropy_coeff: 0.009999999999999998
          kl: 0.009452763569628337
          policy_loss: -0.11542415105634266
          total_loss: -0.12067046115795771
          vf_explained_var: 0.5128559470176697
          vf_loss: 0.007467454652457187
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,114,2967.16,114000,-3.6487,-2.94,-4.62,364.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-21_19-56-54
  done: false
  episode_len_mean: 366.68
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.666799999999966
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7485107011265224
          entropy_coeff: 0.009999999999999998
          kl: 0.008787533672437724
          policy_loss: -0.007612153970532947
          total_loss: -0.009824752228127585
          vf_explained_var: 0.4154188334941864
          vf_loss: 0.009340921599262704
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,115,2992.53,115000,-3.6668,-2.96,-4.62,366.68


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-21_19-57-22
  done: false
  episode_len_mean: 368.08
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.680799999999966
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 324
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.774581205844879
          entropy_coeff: 0.009999999999999998
          kl: 0.00954129687696644
          policy_loss: -0.11455721706151963
          total_loss: -0.11615499961707328
          vf_explained_var: 0.330475389957428
          vf_loss: 0.00970765218242175
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,116,3020.56,116000,-3.6808,-2.96,-4.62,368.08


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-21_19-57-50
  done: false
  episode_len_mean: 370.46
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7045999999999646
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 327
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.750360295507643
          entropy_coeff: 0.009999999999999998
          kl: 0.010229883145609787
          policy_loss: -0.04675368327233526
          total_loss: -0.04984231276644601
          vf_explained_var: 0.18529391288757324
          vf_loss: 0.007509802734582788
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,117,3048.35,117000,-3.7046,-2.96,-4.62,370.46




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-21_19-58-32
  done: false
  episode_len_mean: 371.81
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7180999999999647
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 330
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.847240924835205
          entropy_coeff: 0.009999999999999998
          kl: 0.008775943006681964
          policy_loss: 0.020769144346316656
          total_loss: 0.01619096663263109
          vf_explained_var: 0.23511812090873718
          vf_loss: 0.007970466610923823
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 1180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,118,3090.96,118000,-3.7181,-2.96,-4.62,371.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-21_19-59-05
  done: false
  episode_len_mean: 372.78
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.727799999999965
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 332
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7840480897161695
          entropy_coeff: 0.009999999999999998
          kl: 0.007543980298829488
          policy_loss: -0.11097852033045556
          total_loss: -0.11272697606020504
          vf_explained_var: 0.4655277132987976
          vf_loss: 0.010999835555493418
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 1190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,119,3123.67,119000,-3.7278,-2.96,-4.62,372.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-21_19-59-35
  done: false
  episode_len_mean: 373.81
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7380999999999642
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 335
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7792821884155274
          entropy_coeff: 0.009999999999999998
          kl: 0.009985876547491302
          policy_loss: -0.019977965205907822
          total_loss: -0.02063113475839297
          vf_explained_var: 0.5690925121307373
          vf_loss: 0.010399183073443258
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,120,3153.64,120000,-3.7381,-2.96,-4.62,373.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-21_20-00-02
  done: false
  episode_len_mean: 374.34
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7433999999999634
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 338
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7643639167149863
          entropy_coeff: 0.009999999999999998
          kl: 0.010289313119310231
          policy_loss: -0.02617086172103882
          total_loss: -0.025438368486033546
          vf_explained_var: -0.0695653185248375
          vf_loss: 0.011430847062729298
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,121,3180.6,121000,-3.7434,-2.96,-4.62,374.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-21_20-00-30
  done: false
  episode_len_mean: 375.93
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7592999999999632
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8270638492372302
          entropy_coeff: 0.009999999999999998
          kl: 0.008870821051451866
          policy_loss: -0.011909172518385781
          total_loss: -0.016151626573668587
          vf_explained_var: 0.632205069065094
          vf_loss: 0.008040380427458634
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,122,3209.1,122000,-3.7593,-2.96,-4.62,375.93


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-21_20-00-57
  done: false
  episode_len_mean: 376.76
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7675999999999634
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 343
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8158988793691
          entropy_coeff: 0.009999999999999998
          kl: 0.010288687287801837
          policy_loss: -0.13609144025378758
          total_loss: -0.1449117393957244
          vf_explained_var: 0.7783871293067932
          vf_loss: 0.0023938243529604128
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,123,3235.44,123000,-3.7676,-2.96,-4.62,376.76


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-21_20-01-24
  done: false
  episode_len_mean: 377.14
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.7713999999999634
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 346
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7542603386773004
          entropy_coeff: 0.009999999999999998
          kl: 0.011257231260314802
          policy_loss: -0.09330875972906748
          total_loss: -0.08993862494826317
          vf_explained_var: 0.40646666288375854
          vf_loss: 0.013314104005176988
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,124,3262.99,124000,-3.7714,-2.96,-4.62,377.14


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-21_20-01-51
  done: false
  episode_len_mean: 378.4
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.783999999999963
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 349
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7611743463410272
          entropy_coeff: 0.009999999999999998
          kl: 0.010601608458577709
          policy_loss: 0.00363114294078615
          total_loss: 0.0025654060973061455
          vf_explained_var: 0.3454798460006714
          vf_loss: 0.009389924589130613
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 12500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,125,3289.29,125000,-3.784,-2.96,-4.62,378.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-21_20-02-19
  done: false
  episode_len_mean: 379.73
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.7972999999999626
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 352
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7612710899776882
          entropy_coeff: 0.009999999999999998
          kl: 0.009736842260675754
          policy_loss: 0.03256858959794044
          total_loss: 0.025670911040571003
          vf_explained_var: 0.6357409358024597
          vf_loss: 0.004142666213576578
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,126,3317.95,126000,-3.7973,-3.02,-4.62,379.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-21_20-02-46
  done: false
  episode_len_mean: 380.03
  episode_media: {}
  episode_reward_max: -3.0199999999999796
  episode_reward_mean: -3.8002999999999627
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 354
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8932478944460551
          entropy_coeff: 0.009999999999999998
          kl: 0.010950378584156754
          policy_loss: -0.052752756244606444
          total_loss: -0.05912778104345004
          vf_explained_var: 0.8939918279647827
          vf_loss: 0.005165949960549672
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,127,3344.19,127000,-3.8003,-3.02,-4.62,380.03


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-21_20-03-13
  done: false
  episode_len_mean: 380.81
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8080999999999623
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 357
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.876827727423774
          entropy_coeff: 0.009999999999999998
          kl: 0.010663931627965464
          policy_loss: -0.034548917495542106
          total_loss: -0.04354866908656226
          vf_explained_var: 0.9442715048789978
          vf_loss: 0.002570370570497794
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,128,3371.17,128000,-3.8081,-3.07,-4.62,380.81




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-21_20-03-56
  done: false
  episode_len_mean: 380.85
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8084999999999627
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 360
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8296072867181565
          entropy_coeff: 0.009999999999999998
          kl: 0.011134066314359044
          policy_loss: -0.0919903292424149
          total_loss: -0.09916773910323778
          vf_explained_var: 0.8877294063568115
          vf_loss: 0.003603170209357308
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,129,3414.82,129000,-3.8085,-3.07,-4.62,380.85


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-21_20-04-25
  done: false
  episode_len_mean: 381.88
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8187999999999622
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 362
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8981634656588235
          entropy_coeff: 0.009999999999999998
          kl: 0.016341147453607485
          policy_loss: -0.1036822173330519
          total_loss: -0.1005121524963114
          vf_explained_var: 0.6805427670478821
          vf_loss: 0.0111214237442861
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,130,3442.97,130000,-3.8188,-3.07,-4.62,381.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-21_20-04-49
  done: false
  episode_len_mean: 383.9
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.838999999999962
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 3
  episodes_total: 365
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.984800069861942
          entropy_coeff: 0.009999999999999998
          kl: 0.011136595673429442
          policy_loss: -0.01628938148626023
          total_loss: -0.02497022259566519
          vf_explained_var: 0.7886059284210205
          vf_loss: 0.0036499570861148337
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 1310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,131,3467.26,131000,-3.839,-3.07,-4.62,383.9


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-21_20-05-12
  done: false
  episode_len_mean: 385.17
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.851699999999962
  episode_reward_min: -4.619999999999946
  episodes_this_iter: 2
  episodes_total: 367
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.018728154235416
          entropy_coeff: 0.009999999999999998
          kl: 0.015363258024063638
          policy_loss: 0.03286639071173138
          total_loss: 0.02694117178519567
          vf_explained_var: 0.8617231845855713
          vf_loss: 0.0038918645654727395
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 13200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,132,3490.85,132000,-3.8517,-3.07,-4.62,385.17


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-21_20-05-33
  done: false
  episode_len_mean: 385.52
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8551999999999613
  episode_reward_min: -4.629999999999946
  episodes_this_iter: 2
  episodes_total: 369
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.721878465016683
          entropy_coeff: 0.009999999999999998
          kl: 0.014549991159373476
          policy_loss: -0.09151605135864682
          total_loss: -0.08889519315626886
          vf_explained_var: 0.6295916438102722
          vf_loss: 0.010018395212116754
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,133,3511.03,133000,-3.8552,-3.07,-4.63,385.52


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-21_20-05-56
  done: false
  episode_len_mean: 386.66
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8665999999999623
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 3
  episodes_total: 372
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9262057291136847
          entropy_coeff: 0.009999999999999998
          kl: 0.011239578269929319
          policy_loss: -0.04242124781012535
          total_loss: -0.04702823220027818
          vf_explained_var: 0.6636430025100708
          vf_loss: 0.007068354874435398
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,134,3534.55,134000,-3.8666,-3.07,-4.91,386.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-21_20-06-19
  done: false
  episode_len_mean: 387.0
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8699999999999615
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 2
  episodes_total: 374
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9462492730882432
          entropy_coeff: 0.009999999999999998
          kl: 0.013427210704804413
          policy_loss: 0.08385181377331416
          total_loss: 0.07679061740636825
          vf_explained_var: 0.34897324442863464
          vf_loss: 0.003337930397052939
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 13500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,135,3557.48,135000,-3.87,-3.07,-4.91,387


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-21_20-06-44
  done: false
  episode_len_mean: 387.29
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8728999999999614
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 2
  episodes_total: 376
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9100001321898565
          entropy_coeff: 0.009999999999999998
          kl: 0.010058607832678066
          policy_loss: -0.15408888889683617
          total_loss: -0.16305575453572804
          vf_explained_var: 0.8783061504364014
          vf_loss: 0.0033435709942649635
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,136,3582.06,136000,-3.8729,-3.07,-4.91,387.29


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-21_20-07-10
  done: false
  episode_len_mean: 386.68
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8667999999999614
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 3
  episodes_total: 379
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8385029872258505
          entropy_coeff: 0.009999999999999998
          kl: 0.01336019670274307
          policy_loss: -0.017708688125842146
          total_loss: -0.02409726025329696
          vf_explained_var: 0.7211112976074219
          vf_loss: 0.0029783223387009152
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,137,3607.92,137000,-3.8668,-3.07,-4.91,386.68


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-21_20-07-33
  done: false
  episode_len_mean: 386.58
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.865799999999961
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 2
  episodes_total: 381
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8728614542219373
          entropy_coeff: 0.009999999999999998
          kl: 0.0163929443795458
          policy_loss: -0.1599961398376359
          total_loss: -0.16452438789937232
          vf_explained_var: 0.8799504637718201
          vf_loss: 0.0031351308092578416
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,138,3631.56,138000,-3.8658,-3.07,-4.91,386.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-21_20-07-55
  done: false
  episode_len_mean: 387.96
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.8795999999999604
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 383
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0045690735181174
          entropy_coeff: 0.009999999999999998
          kl: 0.014901557006504224
          policy_loss: -0.09246994157632192
          total_loss: -0.09529456595579783
          vf_explained_var: 0.4001396894454956
          vf_loss: 0.007162513940905531
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,139,3653.18,139000,-3.8796,-3.07,-5.54,387.96


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-21_20-08-16
  done: false
  episode_len_mean: 391.12
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.91119999999996
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 385
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0528016090393066
          entropy_coeff: 0.009999999999999998
          kl: 0.013388048046699592
          policy_loss: -0.03351361552874247
          total_loss: -0.042745577295621236
          vf_explained_var: 0.6644542217254639
          vf_loss: 0.00225912053267368
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 1400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,140,3674.34,140000,-3.9112,-3.07,-5.54,391.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-21_20-08-38
  done: false
  episode_len_mean: 392.6
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.9259999999999606
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 387
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9254741562737359
          entropy_coeff: 0.009999999999999998
          kl: 0.018044946933184154
          policy_loss: -0.13871926963329315
          total_loss: -0.14268935786353218
          vf_explained_var: 0.8915455341339111
          vf_loss: 0.0031043127987585547
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,141,3696.57,141000,-3.926,-3.07,-5.54,392.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-21_20-09-00
  done: false
  episode_len_mean: 395.09
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.95089999999996
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 389
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.993272340297699
          entropy_coeff: 0.009999999999999998
          kl: 0.013600774073767042
          policy_loss: -0.08201627681652705
          total_loss: -0.09067837016450034
          vf_explained_var: 0.6092401146888733
          vf_loss: 0.002090107177435938
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 14200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,142,3717.84,142000,-3.9509,-3.07,-5.54,395.09




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-21_20-09-40
  done: false
  episode_len_mean: 396.45
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.9644999999999597
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 391
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9399574716885886
          entropy_coeff: 0.009999999999999998
          kl: 0.010940909920922406
          policy_loss: -0.11911661633186871
          total_loss: -0.12878670675887002
          vf_explained_var: 0.9235522150993347
          vf_loss: 0.0023443699174094945
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,143,3758.05,143000,-3.9645,-3.07,-5.54,396.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-21_20-10-00
  done: false
  episode_len_mean: 399.34
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -3.993399999999959
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 393
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0663515779707167
          entropy_coeff: 0.009999999999999998
          kl: 0.01108272074653498
          policy_loss: -0.12524331741862826
          total_loss: -0.1313438536392318
          vf_explained_var: 0.07557974755764008
          vf_loss: 0.007082143075563686
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 1440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,144,3778.18,144000,-3.9934,-3.07,-5.54,399.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-21_20-10-19
  done: false
  episode_len_mean: 402.08
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.020799999999959
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 395
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.874463763501909
          entropy_coeff: 0.009999999999999998
          kl: 0.015135289777197386
          policy_loss: -0.062046399464209874
          total_loss: -0.06706442683935165
          vf_explained_var: 0.9077306389808655
          vf_loss: 0.0035102854566907305
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,145,3797.11,145000,-4.0208,-3.07,-5.54,402.08


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-21_20-10-40
  done: false
  episode_len_mean: 404.49
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.0448999999999575
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 397
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0361708588070337
          entropy_coeff: 0.009999999999999998
          kl: 0.011258871053425304
          policy_loss: -0.07218389080630408
          total_loss: -0.08216878328886297
          vf_explained_var: 0.8215012550354004
          vf_loss: 0.0027770730912581912
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,146,3818.22,146000,-4.0449,-3.07,-5.54,404.49


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-21_20-11-01
  done: false
  episode_len_mean: 407.52
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.075199999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 399
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9217669076389736
          entropy_coeff: 0.009999999999999998
          kl: 0.01609710611093692
          policy_loss: -0.08349425991376241
          total_loss: -0.08188786821232902
          vf_explained_var: 0.615752637386322
          vf_loss: 0.009958513075899747
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 14700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,147,3838.72,147000,-4.0752,-3.07,-5.54,407.52


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-21_20-11-24
  done: false
  episode_len_mean: 409.96
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.099599999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 3
  episodes_total: 402
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8522197710143196
          entropy_coeff: 0.009999999999999998
          kl: 0.013721412278313987
          policy_loss: -0.14225716474983427
          total_loss: -0.14377478294902377
          vf_explained_var: 0.5676441192626953
          vf_loss: 0.007742624683305621
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,148,3862.39,148000,-4.0996,-3.07,-5.54,409.96


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-21_20-11-48
  done: false
  episode_len_mean: 410.91
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.109099999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 404
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8261810037824842
          entropy_coeff: 0.009999999999999998
          kl: 0.012974738846280736
          policy_loss: 0.101591331180599
          total_loss: 0.09404165657858053
          vf_explained_var: 0.4373360872268677
          vf_loss: 0.0019541842387601114
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,149,3885.75,149000,-4.1091,-3.07,-5.54,410.91


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-21_20-12-11
  done: false
  episode_len_mean: 411.88
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.118799999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 406
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9595688965585496
          entropy_coeff: 0.009999999999999998
          kl: 0.013856522775528804
          policy_loss: -0.03763953381114536
          total_loss: -0.0434476919265257
          vf_explained_var: 0.5211743116378784
          vf_loss: 0.004434377746656537
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 1500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,150,3909.21,150000,-4.1188,-3.07,-5.54,411.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-21_20-12-34
  done: false
  episode_len_mean: 412.23
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.1222999999999566
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 408
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8109898726145426
          entropy_coeff: 0.009999999999999998
          kl: 0.012841080270814981
          policy_loss: -0.24094852151142226
          total_loss: -0.24534452847308583
          vf_explained_var: 0.8325231075286865
          vf_loss: 0.005046161018415458
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,151,3932.44,151000,-4.1223,-3.07,-5.54,412.23


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-21_20-12-59
  done: false
  episode_len_mean: 412.7
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.126999999999957
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 3
  episodes_total: 411
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7942807899581061
          entropy_coeff: 0.009999999999999998
          kl: 0.0120594023373756
          policy_loss: -0.011284977694352468
          total_loss: -0.01605472829606798
          vf_explained_var: 0.7155802249908447
          vf_loss: 0.005032961628037609
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 15200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,152,3956.63,152000,-4.127,-3.07,-5.54,412.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-21_20-13-23
  done: false
  episode_len_mean: 413.64
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.136399999999957
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 413
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7803523580233256
          entropy_coeff: 0.009999999999999998
          kl: 0.01097894056370282
          policy_loss: -0.030321122871504888
          total_loss: -0.035524192286862266
          vf_explained_var: 0.7925684452056885
          vf_loss: 0.005189671900330318
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,153,3980.42,153000,-4.1364,-3.07,-5.54,413.64


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-21_20-13-44
  done: false
  episode_len_mean: 415.26
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.152599999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 415
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8549485405286152
          entropy_coeff: 0.009999999999999998
          kl: 0.01259375153100242
          policy_loss: -0.13110278424703412
          total_loss: -0.13628613899151484
          vf_explained_var: 0.793481171131134
          vf_loss: 0.00486535418442347
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,154,4001.82,154000,-4.1526,-3.07,-5.54,415.26


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-21_20-14-07
  done: false
  episode_len_mean: 417.17
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.171699999999956
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 417
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7347839381959704
          entropy_coeff: 0.009999999999999998
          kl: 0.013621129302934893
          policy_loss: -0.21641272753477098
          total_loss: -0.21928636282682418
          vf_explained_var: 0.8403806090354919
          vf_loss: 0.005279939912078488
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,155,4024.76,155000,-4.1717,-3.07,-5.54,417.17




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-21_20-14-46
  done: false
  episode_len_mean: 418.18
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.181799999999955
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 3
  episodes_total: 420
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5919799433814155
          entropy_coeff: 0.009999999999999998
          kl: 0.00987156614400821
          policy_loss: -0.09025455233123568
          total_loss: -0.0938754700952106
          vf_explained_var: 0.7233960628509521
          vf_loss: 0.0056355791813176535
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 1560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,156,4063.53,156000,-4.1818,-3.07,-5.54,418.18


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-21_20-15-10
  done: false
  episode_len_mean: 419.42
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.194199999999955
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 422
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6759228295750088
          entropy_coeff: 0.009999999999999998
          kl: 0.012661924502539692
          policy_loss: 0.1022762102385362
          total_loss: 0.09724218845367431
          vf_explained_var: 0.8806154131889343
          vf_loss: 0.003178410235300867
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,157,4087.72,157000,-4.1942,-3.07,-5.54,419.42


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-21_20-15-33
  done: false
  episode_len_mean: 421.05
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.210499999999954
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 424
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.785823608769311
          entropy_coeff: 0.009999999999999998
          kl: 0.019256833420319343
          policy_loss: 0.017444659935103524
          total_loss: 0.016055497692690955
          vf_explained_var: 0.5012524127960205
          vf_loss: 0.003470710743891282
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 1580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,158,4110.53,158000,-4.2105,-3.07,-5.54,421.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-21_20-15-56
  done: false
  episode_len_mean: 421.71
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.217099999999954
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 426
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.605957990222507
          entropy_coeff: 0.009999999999999998
          kl: 0.014123516409076508
          policy_loss: -0.16739065241482523
          total_loss: -0.1710142806586292
          vf_explained_var: 0.9170829057693481
          vf_loss: 0.002902578761697643
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 15900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,159,4133.36,159000,-4.2171,-3.07,-5.54,421.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-21_20-16-19
  done: false
  episode_len_mean: 423.36
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.233599999999954
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 3
  episodes_total: 429
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6096563551161025
          entropy_coeff: 0.009999999999999998
          kl: 0.01352076904949883
          policy_loss: 0.020315307958258522
          total_loss: 0.017900158961613972
          vf_explained_var: 0.808492124080658
          vf_loss: 0.004554896756437504
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 16000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,160,4156.71,160000,-4.2336,-3.07,-5.54,423.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-21_20-16-42
  done: false
  episode_len_mean: 424.48
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.2447999999999535
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 431
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8229707585440742
          entropy_coeff: 0.009999999999999998
          kl: 0.013922012960405026
          policy_loss: 0.03842991408374574
          total_loss: 0.03342998375495275
          vf_explained_var: 0.36459460854530334
          vf_loss: 0.0038324159059104405
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,161,4180.08,161000,-4.2448,-3.07,-5.54,424.48


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-21_20-17-05
  done: false
  episode_len_mean: 426.5
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.2649999999999535
  episode_reward_min: -5.539999999999926
  episodes_this_iter: 2
  episodes_total: 433
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7054019252459207
          entropy_coeff: 0.009999999999999998
          kl: 0.010849721157929398
          policy_loss: -0.06885756187968783
          total_loss: -0.07341295364830229
          vf_explained_var: 0.7198887467384338
          vf_loss: 0.005175065858768196
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,162,4202.54,162000,-4.265,-3.07,-5.54,426.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-21_20-17-25
  done: false
  episode_len_mean: 429.45
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.294499999999953
  episode_reward_min: -5.549999999999926
  episodes_this_iter: 2
  episodes_total: 435
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4822293334537082
          entropy_coeff: 0.009999999999999998
          kl: 0.009767853269656178
          policy_loss: -0.04659725551803907
          total_loss: -0.04894050069981151
          vf_explained_var: 0.16413085162639618
          vf_loss: 0.005885752649757907
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,163,4222.49,163000,-4.2945,-3.07,-5.55,429.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-21_20-17-50
  done: false
  episode_len_mean: 431.14
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.311399999999952
  episode_reward_min: -5.549999999999926
  episodes_this_iter: 3
  episodes_total: 438
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.49455273548762
          entropy_coeff: 0.009999999999999998
          kl: 0.008588304105649907
          policy_loss: -0.03698330140776104
          total_loss: -0.03950633224513796
          vf_explained_var: 0.7330474257469177
          vf_loss: 0.006625390147221171
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 16400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,164,4247.47,164000,-4.3114,-3.07,-5.55,431.14


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-21_20-18-11
  done: false
  episode_len_mean: 433.3
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.332999999999952
  episode_reward_min: -5.549999999999926
  episodes_this_iter: 2
  episodes_total: 440
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6168416765001086
          entropy_coeff: 0.009999999999999998
          kl: 0.010695358012236393
          policy_loss: 0.012686432815260357
          total_loss: 0.008011821202105945
          vf_explained_var: 0.4470149278640747
          vf_loss: 0.0042744367163524855
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,165,4268.81,165000,-4.333,-3.07,-5.55,433.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-21_20-18-33
  done: false
  episode_len_mean: 434.73
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.347299999999951
  episode_reward_min: -5.549999999999926
  episodes_this_iter: 2
  episodes_total: 442
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6283488790194194
          entropy_coeff: 0.009999999999999998
          kl: 0.012143154696756753
          policy_loss: 0.034840885466999476
          total_loss: 0.03381567531161838
          vf_explained_var: 0.28987523913383484
          vf_loss: 0.007061651699607157
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,166,4290.25,166000,-4.3473,-3.07,-5.55,434.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-21_20-18-55
  done: false
  episode_len_mean: 436.62
  episode_media: {}
  episode_reward_max: -3.0699999999999785
  episode_reward_mean: -4.366199999999951
  episode_reward_min: -5.549999999999926
  episodes_this_iter: 2
  episodes_total: 444
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6655850410461426
          entropy_coeff: 0.009999999999999998
          kl: 0.013115652058517835
          policy_loss: -0.11472271821565098
          total_loss: -0.11571007370948791
          vf_explained_var: 0.6472861170768738
          vf_loss: 0.006815427002341797
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,167,4312.23,167000,-4.3662,-3.07,-5.55,436.62


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-21_20-19-14
  done: false
  episode_len_mean: 440.16
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.40159999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 446
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.659861813651191
          entropy_coeff: 0.009999999999999998
          kl: 0.009988556405910703
          policy_loss: -0.07825696178608471
          total_loss: -0.08320243178556362
          vf_explained_var: 0.24101798236370087
          vf_loss: 0.004910873855826342
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 1680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,168,4331.7,168000,-4.4016,-3.3,-6.4,440.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-21_20-19-35
  done: false
  episode_len_mean: 442.24
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.42239999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 448
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.65183893971973
          entropy_coeff: 0.009999999999999998
          kl: 0.010006095136982899
          policy_loss: -0.0960389532148838
          total_loss: -0.102244876159562
          vf_explained_var: 0.3743036091327667
          vf_loss: 0.003558351539767399
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,169,4352.35,169000,-4.4224,-3.3,-6.4,442.24




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-21_20-20-15
  done: false
  episode_len_mean: 444.23
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.44229999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 450
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6882088396284316
          entropy_coeff: 0.009999999999999998
          kl: 0.018773510444455285
          policy_loss: -0.10693675668703186
          total_loss: -0.10546580391625564
          vf_explained_var: 0.8593601584434509
          vf_loss: 0.005680919924957885
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 1700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,170,4392.46,170000,-4.4423,-3.3,-6.4,444.23


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-21_20-20-39
  done: false
  episode_len_mean: 446.31
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.463099999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 453
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4940922061602275
          entropy_coeff: 0.009999999999999998
          kl: 0.012640010198728093
          policy_loss: -0.006280012594328986
          total_loss: -0.00905369259417057
          vf_explained_var: 0.9292841553688049
          vf_loss: 0.0036352345732868544
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,171,4416.74,171000,-4.4631,-3.3,-6.4,446.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-21_20-21-04
  done: false
  episode_len_mean: 446.6
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.465999999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 455
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.589193332195282
          entropy_coeff: 0.009999999999999998
          kl: 0.02345539363427657
          policy_loss: -0.06836523032850689
          total_loss: -0.06308170424567329
          vf_explained_var: 0.7835512161254883
          vf_loss: 0.0053430657096517585
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 17200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,172,4441.9,172000,-4.466,-3.3,-6.4,446.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-21_20-21-28
  done: false
  episode_len_mean: 448.15
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.481499999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 458
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5664546449979146
          entropy_coeff: 0.009999999999999998
          kl: 0.00877159976402015
          policy_loss: -0.004102869000699785
          total_loss: -0.008804197278287676
          vf_explained_var: 0.943860650062561
          vf_loss: 0.002081969730918192
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,173,4465.92,173000,-4.4815,-3.47,-6.4,448.15


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-21_20-21-53
  done: false
  episode_len_mean: 448.7
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.486999999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 460
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6100574519899156
          entropy_coeff: 0.009999999999999998
          kl: 0.012267176940595408
          policy_loss: -0.006534375995397568
          total_loss: -0.008076133413447274
          vf_explained_var: 0.9378746747970581
          vf_loss: 0.0021382943144999445
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,174,4490.63,174000,-4.487,-3.47,-6.4,448.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-21_20-22-18
  done: false
  episode_len_mean: 448.85
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.488499999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 463
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5228741394148932
          entropy_coeff: 0.009999999999999998
          kl: 0.007848629648733861
          policy_loss: -0.01568569971455468
          total_loss: -0.01971391588449478
          vf_explained_var: 0.9184638857841492
          vf_loss: 0.0032537859528222017
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,175,4515.69,175000,-4.4885,-3.47,-6.4,448.85


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-21_20-22-43
  done: false
  episode_len_mean: 448.43
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.484299999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 465
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6308146715164185
          entropy_coeff: 0.009999999999999998
          kl: 0.008693048088515098
          policy_loss: -0.039886906639569335
          total_loss: -0.044144174001283114
          vf_explained_var: 0.923541247844696
          vf_loss: 0.0032491685424853737
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,176,4540.34,176000,-4.4843,-3.47,-6.4,448.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-21_20-23-08
  done: false
  episode_len_mean: 447.81
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.478099999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 468
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5157599382930331
          entropy_coeff: 0.009999999999999998
          kl: 0.008211699660964753
          policy_loss: -0.09206119328737258
          total_loss: -0.0945090886619356
          vf_explained_var: 0.8897733092308044
          vf_loss: 0.004395356353941477
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,177,4565.21,177000,-4.4781,-3.47,-6.4,447.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-21_20-23-31
  done: false
  episode_len_mean: 446.74
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.467399999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 470
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5590523838996888
          entropy_coeff: 0.009999999999999998
          kl: 0.00807408830140209
          policy_loss: 0.05646333032184177
          total_loss: 0.05084997351384825
          vf_explained_var: 0.9577431678771973
          vf_loss: 0.0018021528204877136
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,178,4588.03,178000,-4.4674,-3.47,-6.4,446.74


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-21_20-23-56
  done: false
  episode_len_mean: 445.3
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.45299999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 473
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5048082921240065
          entropy_coeff: 0.009999999999999998
          kl: 0.0072321794435290425
          policy_loss: -0.07310117267900043
          total_loss: -0.07749543074104522
          vf_explained_var: 0.890488862991333
          vf_loss: 0.0033312419248330925
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,179,4613.32,179000,-4.453,-3.47,-6.4,445.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-21_20-24-20
  done: false
  episode_len_mean: 444.59
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.445899999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 475
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5026196546024746
          entropy_coeff: 0.009999999999999998
          kl: 0.009028913805103992
          policy_loss: -0.015203424874279234
          total_loss: -0.018918184749782085
          vf_explained_var: 0.9083272218704224
          vf_loss: 0.002169663195187847
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,180,4637.64,180000,-4.4459,-3.47,-6.4,444.59


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-21_20-24-46
  done: false
  episode_len_mean: 444.72
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.44719999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 478
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4442136181725396
          entropy_coeff: 0.009999999999999998
          kl: 0.0063403055068335665
          policy_loss: -0.1745238231288062
          total_loss: -0.18093461038337813
          vf_explained_var: 0.9716921448707581
          vf_loss: 0.0016117898934882961
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,181,4663.44,181000,-4.4472,-3.47,-6.4,444.72




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-21_20-25-26
  done: false
  episode_len_mean: 445.13
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.45129999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 480
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.514094066619873
          entropy_coeff: 0.009999999999999998
          kl: 0.012548123880705435
          policy_loss: 0.09954904218514761
          total_loss: 0.10371940926545196
          vf_explained_var: 0.636283278465271
          vf_loss: 0.006606334711088695
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,182,4703.35,182000,-4.4513,-3.47,-6.4,445.13


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-21_20-25-49
  done: false
  episode_len_mean: 444.69
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.446899999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 482
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.489677005343967
          entropy_coeff: 0.009999999999999998
          kl: 0.00883880816783981
          policy_loss: -0.012363350060251023
          total_loss: -0.012443852755758497
          vf_explained_var: 0.7718408107757568
          vf_loss: 0.005866974482261058
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,183,4726.45,183000,-4.4469,-3.47,-6.4,444.69


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-21_20-26-10
  done: false
  episode_len_mean: 445.19
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.45189999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 484
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.440977864795261
          entropy_coeff: 0.009999999999999998
          kl: 0.007714093540193037
          policy_loss: 0.025975156616833476
          total_loss: 0.027080045971605512
          vf_explained_var: 0.7298194766044617
          vf_loss: 0.007704151306663536
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,184,4747.26,184000,-4.4519,-3.47,-6.4,445.19


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-21_20-26-33
  done: false
  episode_len_mean: 444.51
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.44509999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 486
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5207295603222317
          entropy_coeff: 0.009999999999999998
          kl: 0.00832126378563253
          policy_loss: -0.03359204911523395
          total_loss: -0.03319776819811927
          vf_explained_var: 0.32322970032691956
          vf_loss: 0.007176296676819524
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,185,4769.97,185000,-4.4451,-3.47,-6.4,444.51


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-21_20-26-55
  done: false
  episode_len_mean: 443.86
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.43859999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 488
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6209528247515361
          entropy_coeff: 0.009999999999999998
          kl: 0.015883775101779622
          policy_loss: -0.12991484685076607
          total_loss: -0.1228571461306678
          vf_explained_var: 0.6948795914649963
          vf_loss: 0.00718490937207308
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,186,4792.17,186000,-4.4386,-3.47,-6.4,443.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-21_20-27-17
  done: false
  episode_len_mean: 444.0
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.43999999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 3
  episodes_total: 491
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.564681315422058
          entropy_coeff: 0.009999999999999998
          kl: 0.0077049924951512515
          policy_loss: 0.09342755493190553
          total_loss: 0.08896308756536908
          vf_explained_var: 0.5204886794090271
          vf_loss: 0.0033810407220799888
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,187,4814.16,187000,-4.44,-3.47,-6.4,444


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-21_20-27-41
  done: false
  episode_len_mean: 440.65
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.406499999999951
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 493
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5151772763994005
          entropy_coeff: 0.009999999999999998
          kl: 0.005752167148486759
          policy_loss: -0.1610805683251884
          total_loss: -0.15953966875871023
          vf_explained_var: 0.07961095124483109
          vf_loss: 0.010868598648812622
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,188,4838.3,188000,-4.4065,-3.33,-6.4,440.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-21_20-28-00
  done: false
  episode_len_mean: 441.28
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.41279999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 495
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.73355014456643
          entropy_coeff: 0.009999999999999998
          kl: 0.0058130841962131955
          policy_loss: -0.10620926519234976
          total_loss: -0.11067495495080948
          vf_explained_var: -0.032323796302080154
          vf_loss: 0.006984064930778308
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,189,4856.96,189000,-4.4128,-3.33,-6.4,441.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-21_20-28-22
  done: false
  episode_len_mean: 439.99
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.399899999999951
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 497
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.641976045237647
          entropy_coeff: 0.009999999999999998
          kl: 0.009079749126236594
          policy_loss: -0.07657364213632213
          total_loss: -0.07935937055283122
          vf_explained_var: 0.8013232946395874
          vf_loss: 0.004440782657669237
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,190,4878.61,190000,-4.3999,-3.33,-6.4,439.99


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-21_20-28-41
  done: false
  episode_len_mean: 441.12
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.41119999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 499
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7439887616369458
          entropy_coeff: 0.009999999999999998
          kl: 0.009460900767451522
          policy_loss: -0.059126893017027116
          total_loss: -0.058803168104754555
          vf_explained_var: -0.029999587684869766
          vf_loss: 0.008184445658116601
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,191,4898.55,191000,-4.4112,-3.33,-6.4,441.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-21_20-29-04
  done: false
  episode_len_mean: 440.32
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.40319999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 501
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.507932256327735
          entropy_coeff: 0.009999999999999998
          kl: 0.008585579270483616
          policy_loss: -0.028942386474874286
          total_loss: -0.030860565271642472
          vf_explained_var: 0.8260666728019714
          vf_loss: 0.0044682478713285595
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,192,4920.7,192000,-4.4032,-3.33,-6.4,440.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-21_20-29-23
  done: false
  episode_len_mean: 442.66
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.42659999999995
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 503
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.629946490128835
          entropy_coeff: 0.009999999999999998
          kl: 0.010196206794061248
          policy_loss: -0.09289681216080983
          total_loss: -0.08772679070631663
          vf_explained_var: 0.19400416314601898
          vf_loss: 0.011145830475854584
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,193,4940.51,193000,-4.4266,-3.33,-6.4,442.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-21_20-29-42
  done: false
  episode_len_mean: 444.52
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.445199999999949
  episode_reward_min: -6.399999999999908
  episodes_this_iter: 2
  episodes_total: 505
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.7393096605936686
          entropy_coeff: 0.009999999999999998
          kl: 0.006196450016658951
          policy_loss: -0.027445056041081748
          total_loss: -0.03180226153797573
          vf_explained_var: 0.6765769720077515
          vf_loss: 0.006761982020301123
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,194,4959.18,194000,-4.4452,-3.33,-6.4,444.52


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-21_20-30-03
  done: false
  episode_len_mean: 446.56
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.46559999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 507
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.6499567243787978
          entropy_coeff: 0.009999999999999998
          kl: 0.010043550065101749
          policy_loss: 0.004947047432263692
          total_loss: 0.007168122132619222
          vf_explained_var: -0.09100385010242462
          vf_loss: 0.008551543789669975
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,195,4979.83,195000,-4.4656,-3.33,-6.65,446.56




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-21_20-30-45
  done: false
  episode_len_mean: 445.45
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.45449999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 510
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4843126349978977
          entropy_coeff: 0.009999999999999998
          kl: 0.003845163703738441
          policy_loss: 0.024107761846648323
          total_loss: 0.02307248819205496
          vf_explained_var: 0.0038977814838290215
          vf_loss: 0.009914622149113306
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,196,5021.8,196000,-4.4545,-3.33,-6.65,445.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-21_20-31-11
  done: false
  episode_len_mean: 444.96
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.44959999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 512
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5505821466445924
          entropy_coeff: 0.009999999999999998
          kl: 0.007218995445658896
          policy_loss: 0.09378169336252742
          total_loss: 0.08617317668265767
          vf_explained_var: 0.49834001064300537
          vf_loss: 0.004242687644889682
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,197,5047.95,197000,-4.4496,-3.33,-6.65,444.96


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-21_20-31-33
  done: false
  episode_len_mean: 444.48
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.44479999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 514
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.621308802233802
          entropy_coeff: 0.009999999999999998
          kl: 0.0127223796662461
          policy_loss: -0.10432641518612702
          total_loss: -0.10810279966228538
          vf_explained_var: 0.765541672706604
          vf_loss: 0.005995996253720174
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,198,5069.91,198000,-4.4448,-3.33,-6.65,444.48


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-21_20-31-52
  done: false
  episode_len_mean: 446.49
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.46489999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 516
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.62907911406623
          entropy_coeff: 0.009999999999999998
          kl: 0.01399192421927285
          policy_loss: 0.07538899199830161
          total_loss: 0.07359715956780645
          vf_explained_var: -0.20689184963703156
          vf_loss: 0.007415543959036263
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,199,5088.38,199000,-4.4649,-3.33,-6.65,446.49


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-21_20-32-12
  done: false
  episode_len_mean: 447.37
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.4736999999999485
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 518
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8021632075309753
          entropy_coeff: 0.009999999999999998
          kl: 0.012871983033957809
          policy_loss: -0.05880159835020701
          total_loss: -0.06459464124507375
          vf_explained_var: 0.6725721955299377
          vf_loss: 0.005712148006488052
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,200,5108.43,200000,-4.4737,-3.33,-6.65,447.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-21_20-32-34
  done: false
  episode_len_mean: 447.39
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.473899999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 520
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7119348539246453
          entropy_coeff: 0.009999999999999998
          kl: 0.009183729215483697
          policy_loss: -0.09937776252627373
          total_loss: -0.1050605457690027
          vf_explained_var: 0.6461626291275024
          vf_loss: 0.006787301732563518
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,201,5131.06,201000,-4.4739,-3.33,-6.65,447.39


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-21_20-32-57
  done: false
  episode_len_mean: 447.21
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.47209999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 522
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5745886034435697
          entropy_coeff: 0.009999999999999998
          kl: 0.008917796523873239
          policy_loss: -0.09043490721119775
          total_loss: -0.09374486340416802
          vf_explained_var: 0.7086960077285767
          vf_loss: 0.0079212926612753
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,202,5153.54,202000,-4.4721,-3.33,-6.65,447.21


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-21_20-33-19
  done: false
  episode_len_mean: 446.97
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.469699999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 525
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5775938802295262
          entropy_coeff: 0.009999999999999998
          kl: 0.007976523396710355
          policy_loss: 0.034346793757544625
          total_loss: 0.027749809126059215
          vf_explained_var: 0.7830249071121216
          vf_loss: 0.0051408378180996
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,203,5176.03,203000,-4.4697,-3.33,-6.65,446.97


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-21_20-33-40
  done: false
  episode_len_mean: 448.36
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.4835999999999485
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 527
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6617190029886033
          entropy_coeff: 0.009999999999999998
          kl: 0.00824456843331076
          policy_loss: 0.04626018504301707
          total_loss: 0.04138828499449624
          vf_explained_var: 0.035145826637744904
          vf_loss: 0.007571477005452228
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,204,5196.84,204000,-4.4836,-3.33,-6.65,448.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-21_20-34-02
  done: false
  episode_len_mean: 449.37
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.493699999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 529
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7395236757066515
          entropy_coeff: 0.009999999999999998
          kl: 0.013017999031461022
          policy_loss: 0.10871850666072634
          total_loss: 0.10003906480140157
          vf_explained_var: 0.7911511659622192
          vf_loss: 0.002125430443104253
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,205,5219.17,205000,-4.4937,-3.33,-6.65,449.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-21_20-34-27
  done: false
  episode_len_mean: 448.37
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.483699999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 531
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.515536708301968
          entropy_coeff: 0.009999999999999998
          kl: 0.013288623776615636
          policy_loss: -0.1283436647719807
          total_loss: -0.12661019828584458
          vf_explained_var: -0.028382182121276855
          vf_loss: 0.010161468741070065
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,206,5243.82,206000,-4.4837,-3.33,-6.65,448.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-21_20-34-50
  done: false
  episode_len_mean: 447.95
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.479499999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 534
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6964053617583381
          entropy_coeff: 0.009999999999999998
          kl: 0.008628872431728456
          policy_loss: -0.023756872945361666
          total_loss: -0.026416250897778406
          vf_explained_var: 0.36219409108161926
          vf_loss: 0.009936311467835265
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,207,5266.26,207000,-4.4795,-3.33,-6.65,447.95


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-21_20-35-12
  done: false
  episode_len_mean: 446.59
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.465899999999951
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 536
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6918159272935656
          entropy_coeff: 0.009999999999999998
          kl: 0.008583499380748292
          policy_loss: 0.11221294452746709
          total_loss: 0.10343473181128501
          vf_explained_var: 0.16903886198997498
          vf_loss: 0.003794550358563558
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,208,5288.81,208000,-4.4659,-3.33,-6.65,446.59


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-21_20-35-36
  done: false
  episode_len_mean: 447.44
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.474399999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 538
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7530821363131206
          entropy_coeff: 0.009999999999999998
          kl: 0.01081943989793367
          policy_loss: -0.0807313397526741
          total_loss: -0.08440955012208885
          vf_explained_var: 0.7590985894203186
          vf_loss: 0.00837527122736598
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,209,5312.29,209000,-4.4744,-3.33,-6.65,447.44




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-21_20-36-16
  done: false
  episode_len_mean: 446.43
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.464299999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 541
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8137421528498332
          entropy_coeff: 0.009999999999999998
          kl: 0.01289750578097109
          policy_loss: -0.05971008183227645
          total_loss: -0.06142771566907565
          vf_explained_var: 0.5965110659599304
          vf_loss: 0.009890424002272387
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,210,5352.5,210000,-4.4643,-3.33,-6.65,446.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-21_20-36-38
  done: false
  episode_len_mean: 445.4
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.45399999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 543
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7891202012697855
          entropy_coeff: 0.009999999999999998
          kl: 0.012640837815248437
          policy_loss: 0.07560515387190712
          total_loss: 0.06803659018543032
          vf_explained_var: 0.837950587272644
          vf_loss: 0.003923214333028429
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,211,5374.4,211000,-4.454,-3.33,-6.65,445.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-21_20-37-00
  done: false
  episode_len_mean: 444.2
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.4419999999999495
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 545
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9396327720748054
          entropy_coeff: 0.009999999999999998
          kl: 0.015055967504181408
          policy_loss: -0.03298876070313984
          total_loss: -0.03632350564002991
          vf_explained_var: 0.08660761266946793
          vf_loss: 0.008439500441050364
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,212,5396.81,212000,-4.442,-3.33,-6.65,444.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-21_20-37-25
  done: false
  episode_len_mean: 442.53
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.425299999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 548
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7275126443968878
          entropy_coeff: 0.009999999999999998
          kl: 0.012571929596397899
          policy_loss: -0.04150966927409172
          total_loss: -0.0413457410203086
          vf_explained_var: 0.25696444511413574
          vf_loss: 0.011074514004091422
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,213,5421.96,213000,-4.4253,-3.33,-6.65,442.53


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-21_20-37-50
  done: false
  episode_len_mean: 441.44
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.41439999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 550
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8544203559557597
          entropy_coeff: 0.009999999999999998
          kl: 0.014452280684854449
          policy_loss: -0.01774571670426263
          total_loss: -0.024496745566527048
          vf_explained_var: 0.5839664936065674
          vf_loss: 0.004476704104389582
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,214,5446.78,214000,-4.4144,-3.33,-6.65,441.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-21_20-38-15
  done: false
  episode_len_mean: 441.44
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.41439999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 552
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9022307091289097
          entropy_coeff: 0.009999999999999998
          kl: 0.011867710223689423
          policy_loss: -0.10461855679750443
          total_loss: -0.10889219972822402
          vf_explained_var: 0.2560470402240753
          vf_loss: 0.008740637703643491
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,215,5471.17,215000,-4.4144,-3.33,-6.65,441.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-21_20-38-40
  done: false
  episode_len_mean: 440.47
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.404699999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 555
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7327238506740994
          entropy_coeff: 0.009999999999999998
          kl: 0.0124359631828482
          policy_loss: 0.002392347902059555
          total_loss: 0.0017851812971962822
          vf_explained_var: 0.08986862003803253
          vf_loss: 0.010424362589967333
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,216,5496.51,216000,-4.4047,-3.33,-6.65,440.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-21_20-39-05
  done: false
  episode_len_mean: 440.6
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.40599999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 557
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7156095306078594
          entropy_coeff: 0.009999999999999998
          kl: 0.014045047788315973
          policy_loss: -0.17180969400538337
          total_loss: -0.17397526419825024
          vf_explained_var: 0.4564470648765564
          vf_loss: 0.00788021524300954
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,217,5521.76,217000,-4.406,-3.33,-6.65,440.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-21_20-39-27
  done: false
  episode_len_mean: 442.3
  episode_media: {}
  episode_reward_max: -3.329999999999973
  episode_reward_mean: -4.422999999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 560
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7645941880014209
          entropy_coeff: 0.009999999999999998
          kl: 0.018536625849793806
          policy_loss: -0.03134800692399343
          total_loss: -0.03480520769953728
          vf_explained_var: 0.6920276880264282
          vf_loss: 0.004804572332068347
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,218,5543.68,218000,-4.423,-3.33,-6.65,442.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-21_20-39-52
  done: false
  episode_len_mean: 442.02
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.42019999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 562
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7857899003558688
          entropy_coeff: 0.009999999999999998
          kl: 0.01866701933094785
          policy_loss: 0.03805352921287219
          total_loss: 0.03766315579414368
          vf_explained_var: -0.22747687995433807
          vf_loss: 0.008017338866678377
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,219,5568.83,219000,-4.4202,-3.14,-6.65,442.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-21_20-40-15
  done: false
  episode_len_mean: 442.45
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.4244999999999495
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 564
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8742796540260316
          entropy_coeff: 0.009999999999999998
          kl: 0.012701317605798578
          policy_loss: -0.15546003381411236
          total_loss: -0.15850243187612958
          vf_explained_var: -0.09149697422981262
          vf_loss: 0.009270360900296105
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,220,5591.33,220000,-4.4245,-3.14,-6.65,442.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-21_20-40-36
  done: false
  episode_len_mean: 444.2
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.4419999999999495
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 566
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8712700181537205
          entropy_coeff: 0.009999999999999998
          kl: 0.012441249195872937
          policy_loss: -0.15509439143869613
          total_loss: -0.15928689506318833
          vf_explained_var: 0.496136337518692
          vf_loss: 0.008221810744402723
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,221,5611.96,221000,-4.442,-3.14,-6.65,444.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-21_20-40-55
  done: false
  episode_len_mean: 446.79
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.467899999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 568
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8135874695248073
          entropy_coeff: 0.009999999999999998
          kl: 0.0100496148822056
          policy_loss: -0.11347367862860362
          total_loss: -0.11848569909731548
          vf_explained_var: -0.06047467142343521
          vf_loss: 0.008036231051664799
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,222,5631.76,222000,-4.4679,-3.14,-6.65,446.79




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-21_20-41-36
  done: false
  episode_len_mean: 448.08
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.480799999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 571
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7144882016711764
          entropy_coeff: 0.009999999999999998
          kl: 0.013068494735144418
          policy_loss: -0.03673382484250599
          total_loss: -0.03982201135820813
          vf_explained_var: 0.05175161361694336
          vf_loss: 0.007440769511999355
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,223,5672.1,223000,-4.4808,-3.14,-6.65,448.08


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-21_20-41-59
  done: false
  episode_len_mean: 450.08
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.500799999999948
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 573
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9781366189320881
          entropy_coeff: 0.009999999999999998
          kl: 0.013030120808040804
          policy_loss: 0.01788151040673256
          total_loss: 0.007872876359356774
          vf_explained_var: 0.6841318011283875
          vf_loss: 0.00317623153484116
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,224,5695.24,224000,-4.5008,-3.14,-6.65,450.08


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-21_20-42-22
  done: false
  episode_len_mean: 451.54
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.515399999999948
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 575
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7631278620825874
          entropy_coeff: 0.009999999999999998
          kl: 0.013490544302854415
          policy_loss: 0.07130702129668659
          total_loss: 0.06551669811209043
          vf_explained_var: 0.05849691852927208
          vf_loss: 0.005011364981894278
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,225,5718.72,225000,-4.5154,-3.14,-6.65,451.54


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-21_20-42-45
  done: false
  episode_len_mean: 453.73
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.537299999999947
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 577
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0164630744192333
          entropy_coeff: 0.009999999999999998
          kl: 0.013261216022098152
          policy_loss: -0.0037578899413347245
          total_loss: -0.010368082630965446
          vf_explained_var: -0.0790354534983635
          vf_loss: 0.006840948797374343
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,226,5741.08,226000,-4.5373,-3.14,-6.65,453.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-21_20-43-13
  done: false
  episode_len_mean: 451.36
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -4.513599999999947
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 580
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7437911417749192
          entropy_coeff: 0.009999999999999998
          kl: 0.012902288421272601
          policy_loss: -0.02638280259238349
          total_loss: -0.0314079847600725
          vf_explained_var: 0.5805396437644958
          vf_loss: 0.005880951885289202
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,227,5769.56,227000,-4.5136,-2.84,-6.65,451.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-21_20-43-39
  done: false
  episode_len_mean: 450.87
  episode_media: {}
  episode_reward_max: -2.8399999999999834
  episode_reward_mean: -4.508699999999949
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 2
  episodes_total: 582
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6932110124164157
          entropy_coeff: 0.009999999999999998
          kl: 0.014210678163036958
          policy_loss: 0.05074218478467729
          total_loss: 0.046736684110429555
          vf_explained_var: 0.6091158986091614
          vf_loss: 0.005732454337541841
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,228,5795.03,228000,-4.5087,-2.84,-6.65,450.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-21_20-44-11
  done: false
  episode_len_mean: 444.47
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -4.4446999999999495
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 585
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2858545899391174
          entropy_coeff: 0.009999999999999998
          kl: 0.004439651183765056
          policy_loss: -0.12429724824097421
          total_loss: -0.12203420003255208
          vf_explained_var: 0.2402952015399933
          vf_loss: 0.012874023047172362
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,229,5827.38,229000,-4.4447,-2.83,-6.65,444.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-21_20-44-36
  done: false
  episode_len_mean: 443.34
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -4.43339999999995
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 588
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0701202591260275
          entropy_coeff: 0.009999999999999998
          kl: 0.017167470764504443
          policy_loss: 0.020663808700111176
          total_loss: 0.008709804796510272
          vf_explained_var: 0.6705447435379028
          vf_loss: 0.004401680565853086
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,230,5852.19,230000,-4.4334,-2.83,-6.65,443.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-21_20-45-08
  done: false
  episode_len_mean: 437.83
  episode_media: {}
  episode_reward_max: -2.7499999999999853
  episode_reward_mean: -4.378299999999951
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 591
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.288594122727712
          entropy_coeff: 0.009999999999999998
          kl: 0.028840573065503663
          policy_loss: 0.0820071113606294
          total_loss: 0.08145001447863048
          vf_explained_var: 0.8516587018966675
          vf_loss: 0.005028572585433722
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,231,5884.43,231000,-4.3783,-2.75,-6.65,437.83


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-21_20-45-34
  done: false
  episode_len_mean: 436.7
  episode_media: {}
  episode_reward_max: -2.7499999999999853
  episode_reward_mean: -4.366999999999951
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 594
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4047291994094848
          entropy_coeff: 0.009999999999999998
          kl: 0.014704786877606029
          policy_loss: 0.0035385769688420824
          total_loss: 0.0014910714907778633
          vf_explained_var: 0.6843506693840027
          vf_loss: 0.006416563588815431
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,232,5910.21,232000,-4.367,-2.75,-6.65,436.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-21_20-46-06
  done: false
  episode_len_mean: 431.41
  episode_media: {}
  episode_reward_max: -2.7499999999999853
  episode_reward_mean: -4.314099999999952
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 597
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2656516194343568
          entropy_coeff: 0.009999999999999998
          kl: 0.005085503761990578
          policy_loss: -0.08094472115238507
          total_loss: -0.084324761480093
          vf_explained_var: 0.4986784756183624
          vf_loss: 0.007345572186426984
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 23300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,233,5942.05,233000,-4.3141,-2.75,-6.65,431.41




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-21_20-46-53
  done: false
  episode_len_mean: 423.28
  episode_media: {}
  episode_reward_max: -2.7499999999999853
  episode_reward_mean: -4.232799999999954
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 4
  episodes_total: 601
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2800638927353754
          entropy_coeff: 0.009999999999999998
          kl: 0.015634307738721927
          policy_loss: -0.021263501793146133
          total_loss: -0.020325018879440097
          vf_explained_var: 0.4807233214378357
          vf_loss: 0.00780297103855345
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,234,5989.5,234000,-4.2328,-2.75,-6.65,423.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-21_20-47-26
  done: false
  episode_len_mean: 416.5
  episode_media: {}
  episode_reward_max: -2.7499999999999853
  episode_reward_mean: -4.164999999999955
  episode_reward_min: -6.649999999999903
  episodes_this_iter: 3
  episodes_total: 604
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.469964443312751
          entropy_coeff: 0.009999999999999998
          kl: 0.018092306352477768
          policy_loss: 0.023159708537989192
          total_loss: 0.02371932069460551
          vf_explained_var: 0.2799548804759979
          vf_loss: 0.008389836715327368
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,235,6021.64,235000,-4.165,-2.75,-6.65,416.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-21_20-47-59
  done: false
  episode_len_mean: 406.7
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -4.0669999999999575
  episode_reward_min: -6.009999999999916
  episodes_this_iter: 4
  episodes_total: 608
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3043718099594117
          entropy_coeff: 0.009999999999999998
          kl: 0.007256337161149428
          policy_loss: 0.015106704375810093
          total_loss: 0.016830618017249638
          vf_explained_var: 0.09907380491495132
          vf_loss: 0.01201248982300361
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 2360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,236,6055.19,236000,-4.067,-2.72,-6.01,406.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-21_20-48-35
  done: false
  episode_len_mean: 403.3
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -4.032999999999958
  episode_reward_min: -6.009999999999916
  episodes_this_iter: 3
  episodes_total: 611
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3406651496887207
          entropy_coeff: 0.009999999999999998
          kl: 0.004945689046754885
          policy_loss: -0.09768299733599027
          total_loss: -0.09749595026175181
          vf_explained_var: 0.17928609251976013
          vf_loss: 0.011715884517050452
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 2370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,237,6091.04,237000,-4.033,-2.62,-6.01,403.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-21_20-49-10
  done: false
  episode_len_mean: 395.58
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.95579999999996
  episode_reward_min: -5.869999999999919
  episodes_this_iter: 4
  episodes_total: 615
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2351368519994947
          entropy_coeff: 0.009999999999999998
          kl: 0.008429021271791548
          policy_loss: -0.020013201236724853
          total_loss: -0.01784528460767534
          vf_explained_var: 0.14275266230106354
          vf_loss: 0.012919086124747991
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,238,6126.09,238000,-3.9558,-2.59,-5.87,395.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-21_20-49-42
  done: false
  episode_len_mean: 386.57
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.8656999999999613
  episode_reward_min: -5.749999999999922
  episodes_this_iter: 4
  episodes_total: 619
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2639268543985156
          entropy_coeff: 0.009999999999999998
          kl: 0.00808959299627428
          policy_loss: 0.02422562970055474
          total_loss: 0.025489246514108447
          vf_explained_var: 0.1667165607213974
          vf_loss: 0.012367124845170312
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 23900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,239,6158.24,239000,-3.8657,-2.58,-5.75,386.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-21_20-50-18
  done: false
  episode_len_mean: 378.98
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.7897999999999628
  episode_reward_min: -5.749999999999922
  episodes_this_iter: 4
  episodes_total: 623
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2484918607605828
          entropy_coeff: 0.009999999999999998
          kl: 0.006619578937074758
          policy_loss: 0.017127209777633347
          total_loss: 0.019131553421417872
          vf_explained_var: 0.19227854907512665
          vf_loss: 0.013232575967493984
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,240,6193.57,240000,-3.7898,-2.58,-5.75,378.98


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-21_20-50-53
  done: false
  episode_len_mean: 370.23
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.7022999999999655
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 4
  episodes_total: 627
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2480980224079556
          entropy_coeff: 0.009999999999999998
          kl: 0.01305883787756754
          policy_loss: 0.0025432618955771127
          total_loss: 0.0056574713852670456
          vf_explained_var: 0.23598796129226685
          vf_loss: 0.013116052374243736
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,241,6229,241000,-3.7023,-2.55,-5.61,370.23




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-21_20-51-47
  done: false
  episode_len_mean: 364.86
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.648599999999966
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 630
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.208608631292979
          entropy_coeff: 0.009999999999999998
          kl: 0.009419861377853422
          policy_loss: -0.1467158599032296
          total_loss: -0.1500170444448789
          vf_explained_var: 0.5896352529525757
          vf_loss: 0.006996600350572003
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,242,6282.93,242000,-3.6486,-2.49,-5.61,364.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-21_20-52-21
  done: false
  episode_len_mean: 358.71
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.5870999999999675
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 4
  episodes_total: 634
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2067522989379036
          entropy_coeff: 0.009999999999999998
          kl: 0.016716233924910028
          policy_loss: -0.04417988417877091
          total_loss: -0.041997621705134706
          vf_explained_var: 0.38775724172592163
          vf_loss: 0.011076311173383147
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,243,6316.41,243000,-3.5871,-2.49,-5.61,358.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-21_20-52-56
  done: false
  episode_len_mean: 352.09
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.520899999999969
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 4
  episodes_total: 638
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2359293129709032
          entropy_coeff: 0.009999999999999998
          kl: 0.0062650829070183215
          policy_loss: -0.015353414333528943
          total_loss: -0.01422071937057707
          vf_explained_var: 0.2528209984302521
          vf_loss: 0.012302601606481604
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,244,6351.47,244000,-3.5209,-2.49,-5.61,352.09


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-21_20-53-30
  done: false
  episode_len_mean: 347.7
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.4769999999999697
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 641
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.096436991956499
          entropy_coeff: 0.009999999999999998
          kl: 0.017568515275588857
          policy_loss: -0.07955612316727638
          total_loss: -0.07986336110366715
          vf_explained_var: 0.68871009349823
          vf_loss: 0.007321860594674945
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,245,6386.01,245000,-3.477,-2.49,-5.61,347.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-21_20-54-04
  done: false
  episode_len_mean: 340.43
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.4042999999999717
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 4
  episodes_total: 645
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.220313963625166
          entropy_coeff: 0.009999999999999998
          kl: 0.04503033069769842
          policy_loss: 0.031043880350059932
          total_loss: 0.0357806823319859
          vf_explained_var: 0.7050850987434387
          vf_loss: 0.008391215238306257
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,246,6419.4,246000,-3.4043,-2.49,-5.61,340.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-21_20-54-35
  done: false
  episode_len_mean: 337.15
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.3714999999999717
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 648
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2171455899874368
          entropy_coeff: 0.009999999999999998
          kl: 0.017294287984130623
          policy_loss: -0.08998554237186909
          total_loss: -0.0865792820437087
          vf_explained_var: 0.3892359733581543
          vf_loss: 0.010652895168297821
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 2470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,247,6451.05,247000,-3.3715,-2.49,-5.61,337.15


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-21_20-55-03
  done: false
  episode_len_mean: 336.38
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.363799999999972
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 651
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5000933435228136
          entropy_coeff: 0.009999999999999998
          kl: 0.03437974316321504
          policy_loss: -0.05864810666276349
          total_loss: -0.057822983298036784
          vf_explained_var: 0.5941292643547058
          vf_loss: 0.006035889315211938
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 2480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,248,6478.37,248000,-3.3638,-2.49,-5.61,336.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-21_20-55-33
  done: false
  episode_len_mean: 332.88
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.3287999999999727
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 654
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.4160549667146471
          entropy_coeff: 0.009999999999999998
          kl: 0.014877600932297301
          policy_loss: -0.14525322222875225
          total_loss: -0.14921402157180838
          vf_explained_var: 0.8074633479118347
          vf_loss: 0.0038448064445724918
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,249,6508.58,249000,-3.3288,-2.49,-5.61,332.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-21_20-56-04
  done: false
  episode_len_mean: 328.7
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.286999999999974
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 4
  episodes_total: 658
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1526883032586839
          entropy_coeff: 0.009999999999999998
          kl: 0.023332908584468213
          policy_loss: 0.036353026330471036
          total_loss: 0.03994199534257253
          vf_explained_var: 0.8905958533287048
          vf_loss: 0.005149238750648996
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,250,6539.55,250000,-3.287,-2.49,-5.61,328.7




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-21_20-56-52
  done: false
  episode_len_mean: 325.64
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2563999999999744
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 661
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.620264462629954
          entropy_coeff: 0.009999999999999998
          kl: 0.01534862649478374
          policy_loss: -0.04499028639660941
          total_loss: -0.04407005144490136
          vf_explained_var: 0.10569985955953598
          vf_loss: 0.007288670027628541
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 2510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,251,6588.16,251000,-3.2564,-2.49,-5.61,325.64


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-21_20-57-22
  done: false
  episode_len_mean: 322.33
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.223299999999975
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 664
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5956423388587104
          entropy_coeff: 0.009999999999999998
          kl: 0.01170974797741289
          policy_loss: 0.0971369120809767
          total_loss: 0.09224469653434224
          vf_explained_var: 0.7442124485969543
          vf_loss: 0.0035615036910813715
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,252,6617.85,252000,-3.2233,-2.49,-5.61,322.33


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-21_20-57-53
  done: false
  episode_len_mean: 316.68
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1667999999999767
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 667
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.516927018430498
          entropy_coeff: 0.009999999999999998
          kl: 0.019756342209348632
          policy_loss: 0.01201222472720676
          total_loss: 0.01376683231857088
          vf_explained_var: 0.8382337093353271
          vf_loss: 0.004265541729465541
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,253,6649.03,253000,-3.1668,-2.49,-5.61,316.68


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-21_20-58-23
  done: false
  episode_len_mean: 311.79
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.117899999999978
  episode_reward_min: -5.609999999999925
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3576888190375433
          entropy_coeff: 0.009999999999999998
          kl: 0.013373278789402813
          policy_loss: -0.0793048806488514
          total_loss: -0.07689162641763687
          vf_explained_var: 0.5994133353233337
          vf_loss: 0.00742158032177637
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,254,6678.24,254000,-3.1179,-2.49,-5.61,311.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-21_20-58-54
  done: false
  episode_len_mean: 304.26
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.04259999999998
  episode_reward_min: -5.34999999999993
  episodes_this_iter: 4
  episodes_total: 674
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.518036499288347
          entropy_coeff: 0.009999999999999998
          kl: 0.018711977017854528
          policy_loss: -0.0018432569172647263
          total_loss: -0.0009490220083130731
          vf_explained_var: 0.8307228684425354
          vf_loss: 0.0040854101625478105
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 25

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,255,6709.55,255000,-3.0426,-2.49,-5.35,304.26


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-21_20-59-26
  done: false
  episode_len_mean: 299.65
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.996499999999981
  episode_reward_min: -5.34999999999993
  episodes_this_iter: 3
  episodes_total: 677
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4496623026000128
          entropy_coeff: 0.009999999999999998
          kl: 0.012031641108977167
          policy_loss: 0.016635604202747345
          total_loss: 0.014195837577184041
          vf_explained_var: 0.44905099272727966
          vf_loss: 0.004347908024727884
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 2560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,256,6741.94,256000,-2.9965,-2.49,-5.35,299.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-21_20-59-59
  done: false
  episode_len_mean: 298.1
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9809999999999803
  episode_reward_min: -5.34999999999993
  episodes_this_iter: 3
  episodes_total: 680
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4198435650931465
          entropy_coeff: 0.009999999999999998
          kl: 0.010897462578521457
          policy_loss: 0.03290600727001826
          total_loss: 0.027602917204300564
          vf_explained_var: 0.9324609637260437
          vf_loss: 0.0019130947889708396
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 25700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,257,6774.21,257000,-2.981,-2.49,-5.35,298.1


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-21_21-00-30
  done: false
  episode_len_mean: 296.25
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.96249999999998
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 683
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4665502044889662
          entropy_coeff: 0.009999999999999998
          kl: 0.016956391955995482
          policy_loss: -0.06502187351385752
          total_loss: -0.0638735079103046
          vf_explained_var: 0.8273932337760925
          vf_loss: 0.004949526152470045
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 25800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,258,6805.77,258000,-2.9625,-2.49,-4.92,296.25


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-21_21-01-01
  done: false
  episode_len_mean: 294.65
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9464999999999817
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 4
  episodes_total: 687
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3614774081442091
          entropy_coeff: 0.009999999999999998
          kl: 0.01148765974175012
          policy_loss: 0.04799014702439308
          total_loss: 0.046777073459492786
          vf_explained_var: 0.8340224623680115
          vf_loss: 0.005041295352081458
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 25900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,259,6836.54,259000,-2.9465,-2.49,-4.27,294.65




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-21_21-01-47
  done: false
  episode_len_mean: 295.11
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9510999999999807
  episode_reward_min: -4.269999999999953
  episodes_this_iter: 3
  episodes_total: 690
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.585444864961836
          entropy_coeff: 0.009999999999999998
          kl: 0.014631125494773557
          policy_loss: 0.019879067440827687
          total_loss: 0.01625623471207089
          vf_explained_var: 0.8664856553077698
          vf_loss: 0.0028571230456388244
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 2600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,260,6882.39,260000,-2.9511,-2.49,-4.27,295.11


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-21_21-02-17
  done: false
  episode_len_mean: 294.87
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9486999999999806
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 693
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5527704450819226
          entropy_coeff: 0.009999999999999998
          kl: 0.012656742950093185
          policy_loss: -0.01909328384531869
          total_loss: -0.020198637578222486
          vf_explained_var: 0.6298106908798218
          vf_loss: 0.006312888585186253
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,261,6912.59,261000,-2.9487,-2.49,-4.13,294.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-21_21-02-45
  done: false
  episode_len_mean: 295.54
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9553999999999796
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 696
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7321207549836901
          entropy_coeff: 0.009999999999999998
          kl: 0.013613820615855914
          policy_loss: -0.05470422067575985
          total_loss: -0.0582941607468658
          vf_explained_var: 0.6804731488227844
          vf_loss: 0.0050085841264161796
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,262,6940.03,262000,-2.9554,-2.49,-4.13,295.54


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-21_21-03-12
  done: false
  episode_len_mean: 296.92
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.96919999999998
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 2
  episodes_total: 698
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6577457600169712
          entropy_coeff: 0.009999999999999998
          kl: 0.016588888502848266
          policy_loss: -0.1494906249973509
          total_loss: -0.1496820648511251
          vf_explained_var: 0.7021847367286682
          vf_loss: 0.005757142305891547
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,263,6966.96,263000,-2.9692,-2.49,-4.13,296.92


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-21_21-03-41
  done: false
  episode_len_mean: 298.05
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9804999999999806
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 701
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.469935933748881
          entropy_coeff: 0.009999999999999998
          kl: 0.01249789187087161
          policy_loss: -0.08062229951222738
          total_loss: -0.07748456762896644
          vf_explained_var: 0.524699330329895
          vf_loss: 0.00982940665109911
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,264,6996.13,264000,-2.9805,-2.49,-4.13,298.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-21_21-04-11
  done: false
  episode_len_mean: 299.66
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9965999999999804
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 704
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7730783740679423
          entropy_coeff: 0.009999999999999998
          kl: 0.012848769880911614
          policy_loss: -0.020967580046918656
          total_loss: -0.022695865896013048
          vf_explained_var: 0.523371696472168
          vf_loss: 0.007769994979672548
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,265,7025.93,265000,-2.9966,-2.49,-4.13,299.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-21_21-04-40
  done: false
  episode_len_mean: 301.78
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0177999999999794
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 707
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9906999150911966
          entropy_coeff: 0.009999999999999998
          kl: 0.013247831150755424
          policy_loss: 0.015062071879704793
          total_loss: 0.010196372204356723
          vf_explained_var: 0.6057475805282593
          vf_loss: 0.006553115183487534
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,266,7055.4,266000,-3.0178,-2.49,-4.13,301.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-21_21-05-09
  done: false
  episode_len_mean: 303.41
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.034099999999979
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 710
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7463588171535067
          entropy_coeff: 0.009999999999999998
          kl: 0.009540372585737146
          policy_loss: -0.044433846076329546
          total_loss: -0.04725041331516372
          vf_explained_var: 0.14236782491207123
          vf_loss: 0.008534285658970475
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,267,7084.46,267000,-3.0341,-2.49,-4.13,303.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-21_21-05-37
  done: false
  episode_len_mean: 306.05
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0604999999999785
  episode_reward_min: -4.129999999999956
  episodes_this_iter: 3
  episodes_total: 713
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8357946621047125
          entropy_coeff: 0.009999999999999998
          kl: 0.013435178105546402
          policy_loss: 0.035626137256622316
          total_loss: 0.030522248645623525
          vf_explained_var: 0.6005477905273438
          vf_loss: 0.004645833168696198
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,268,7111.98,268000,-3.0605,-2.49,-4.13,306.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-21_21-06-04
  done: false
  episode_len_mean: 308.73
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1266999999999774
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 2
  episodes_total: 715
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8541823705037435
          entropy_coeff: 0.009999999999999998
          kl: 0.010822248218461814
          policy_loss: -0.011483118848668204
          total_loss: 0.043032312724325394
          vf_explained_var: 0.03686777129769325
          vf_loss: 0.06612319498219424
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,269,7139.68,269000,-3.1267,-2.49,-7.77,308.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-21_21-06-32
  done: false
  episode_len_mean: 311.31
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.152499999999977
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 718
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.467098335425059
          entropy_coeff: 0.009999999999999998
          kl: 0.012135065544775376
          policy_loss: -0.14865387909942204
          total_loss: -0.14117394089698793
          vf_explained_var: 0.32881680130958557
          vf_loss: 0.014375711211727725
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 2700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,270,7167.45,270000,-3.1525,-2.49,-7.77,311.31




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-21_21-07-17
  done: false
  episode_len_mean: 313.8
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1771999999999765
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 721
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.796246752474043
          entropy_coeff: 0.009999999999999998
          kl: 0.01463260618769244
          policy_loss: -0.1400898616347048
          total_loss: -0.136786651197407
          vf_explained_var: 0.6711161136627197
          vf_loss: 0.011890235106046828
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,271,7212.41,271000,-3.1772,-2.49,-7.77,313.8


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-21_21-07-46
  done: false
  episode_len_mean: 316.67
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2349999999999763
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 724
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8123409933514065
          entropy_coeff: 0.009999999999999998
          kl: 0.009573542592819074
          policy_loss: -0.04034592600332366
          total_loss: -0.02681163822611173
          vf_explained_var: 0.7057963609695435
          vf_loss: 0.025523709210877616
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,272,7240.93,272000,-3.235,-2.49,-7.77,316.67


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-21_21-08-15
  done: false
  episode_len_mean: 318.85
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2567999999999757
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 727
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.565128129058414
          entropy_coeff: 0.009999999999999998
          kl: 0.013661257458955746
          policy_loss: 0.008251845836639404
          total_loss: 0.009514699793524213
          vf_explained_var: 0.5824495553970337
          vf_loss: 0.008161058760662046
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 2730

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,273,7270.02,273000,-3.2568,-2.49,-7.77,318.85


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-21_21-08-48
  done: false
  episode_len_mean: 321.2
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.280299999999975
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 730
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4725760592354669
          entropy_coeff: 0.009999999999999998
          kl: 0.010059884743299028
          policy_loss: -0.012086815138657888
          total_loss: -0.009767567697498534
          vf_explained_var: 0.1474376767873764
          vf_loss: 0.010599412616445786
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,274,7303.09,274000,-3.2803,-2.57,-7.77,321.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-21_21-09-19
  done: false
  episode_len_mean: 322.28
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.2910999999999744
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 733
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3883892112308078
          entropy_coeff: 0.009999999999999998
          kl: 0.00805689490648456
          policy_loss: 0.008709041277567546
          total_loss: 0.008752305308977764
          vf_explained_var: 0.3578372895717621
          vf_loss: 0.008764921899677978
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 2750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,275,7333.85,275000,-3.2911,-2.57,-7.77,322.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-21_21-09-48
  done: false
  episode_len_mean: 324.44
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.3126999999999747
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 736
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4157610045539009
          entropy_coeff: 0.009999999999999998
          kl: 0.008747492581847007
          policy_loss: -0.014920526122053464
          total_loss: -0.01664260799686114
          vf_explained_var: 0.09726561605930328
          vf_loss: 0.006830812047701329
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,276,7363.26,276000,-3.3127,-2.57,-7.77,324.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-21_21-10-16
  done: false
  episode_len_mean: 327.05
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.338799999999975
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 739
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.850521116786533
          entropy_coeff: 0.009999999999999998
          kl: 0.01628001660770965
          policy_loss: 0.02481877696182993
          total_loss: 0.02554545799891154
          vf_explained_var: 0.5993639826774597
          vf_loss: 0.008800917061873609
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,277,7391.05,277000,-3.3388,-2.57,-7.77,327.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-21_21-10-48
  done: false
  episode_len_mean: 328.16
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.3498999999999732
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 742
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.373229710261027
          entropy_coeff: 0.009999999999999998
          kl: 0.007930608962792057
          policy_loss: 0.01615444463160303
          total_loss: 0.014580546650621626
          vf_explained_var: 0.5721988081932068
          vf_loss: 0.007077080217034866
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 27800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,278,7423.5,278000,-3.3499,-2.57,-7.77,328.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-21_21-11-19
  done: false
  episode_len_mean: 329.44
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -3.362699999999973
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 745
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3692772375212776
          entropy_coeff: 0.009999999999999998
          kl: 0.012191516055602373
          policy_loss: -0.12205986086693076
          total_loss: -0.12241309214797284
          vf_explained_var: 0.7226249575614929
          vf_loss: 0.00552816156996414
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 27900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,279,7453.64,279000,-3.3627,-2.68,-7.77,329.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-21_21-11-44
  done: false
  episode_len_mean: 331.51
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -3.3833999999999738
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 748
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.948102241092258
          entropy_coeff: 0.009999999999999998
          kl: 0.007833891337425344
          policy_loss: 0.018256872726811302
          total_loss: 0.013264289498329163
          vf_explained_var: -0.0873676985502243
          vf_loss: 0.009469087718429768
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,280,7479.37,280000,-3.3834,-2.68,-7.77,331.51




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-21_21-12-31
  done: false
  episode_len_mean: 330.78
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -3.376099999999974
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 751
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.578714538945092
          entropy_coeff: 0.009999999999999998
          kl: 0.014365917875733509
          policy_loss: -0.010263150102562374
          total_loss: -0.01270482838153839
          vf_explained_var: 0.409286767244339
          vf_loss: 0.004140900230009316
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 28100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,281,7526.45,281000,-3.3761,-2.68,-7.77,330.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-21_21-12-59
  done: false
  episode_len_mean: 333.08
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -3.399099999999973
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 2
  episodes_total: 753
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9421794162856207
          entropy_coeff: 0.009999999999999998
          kl: 0.008995474225817625
          policy_loss: -0.12896346069044537
          total_loss: -0.13373240364922417
          vf_explained_var: -0.015845773741602898
          vf_loss: 0.008889243377941764
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,282,7553.64,282000,-3.3991,-2.68,-7.77,333.08


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-21_21-13-35
  done: false
  episode_len_mean: 332.51
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.3933999999999735
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 757
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1454590479532878
          entropy_coeff: 0.009999999999999998
          kl: 0.005224501048995044
          policy_loss: 0.017067270891533957
          total_loss: 0.019233280917008717
          vf_explained_var: 0.3753167986869812
          vf_loss: 0.010273143649101257
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 28

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,283,7589.56,283000,-3.3934,-2.61,-7.77,332.51


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-21_21-14-07
  done: false
  episode_len_mean: 332.6
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.394299999999973
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 760
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6066556493441264
          entropy_coeff: 0.009999999999999998
          kl: 0.006275431287740081
          policy_loss: -0.06345277693536547
          total_loss: -0.0705740244852172
          vf_explained_var: 0.05929991975426674
          vf_loss: 0.00492449689643561
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 28400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,284,7621.91,284000,-3.3943,-2.61,-7.77,332.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-21_21-14-40
  done: false
  episode_len_mean: 330.7
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.375299999999973
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 764
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1637053728103637
          entropy_coeff: 0.009999999999999998
          kl: 0.00819273623897973
          policy_loss: -0.07917409266034763
          total_loss: -0.07325171712372038
          vf_explained_var: 0.3362971544265747
          vf_loss: 0.012310155708756711
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 28500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,285,7654.53,285000,-3.3753,-2.5,-7.77,330.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-21_21-15-17
  done: false
  episode_len_mean: 328.5
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.3532999999999737
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 768
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0905536399947273
          entropy_coeff: 0.009999999999999998
          kl: 0.0024369474895286833
          policy_loss: -0.012503964164190822
          total_loss: -0.010541941225528716
          vf_explained_var: 0.24119041860103607
          vf_loss: 0.011306150702552663
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,286,7691.67,286000,-3.3533,-2.5,-7.77,328.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-21_21-15-53
  done: false
  episode_len_mean: 327.12
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.339499999999974
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 771
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.0944621390766567
          entropy_coeff: 0.009999999999999998
          kl: 0.01017910994796662
          policy_loss: -0.11358155724075106
          total_loss: -0.11031875726249483
          vf_explained_var: 0.2574424147605896
          vf_loss: 0.010946424667619997
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,287,7728.02,287000,-3.3395,-2.5,-7.77,327.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-21_21-16-24
  done: false
  episode_len_mean: 325.75
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.325799999999974
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 775
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.078201158841451
          entropy_coeff: 0.009999999999999998
          kl: 0.006309516664135057
          policy_loss: -0.029136978917651706
          total_loss: -0.027523238129085966
          vf_explained_var: 0.30406567454338074
          vf_loss: 0.010374430111712879
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,288,7758.81,288000,-3.3258,-2.5,-7.77,325.75


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-21_21-17-00
  done: false
  episode_len_mean: 323.62
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.3044999999999747
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 779
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.0401826712820266
          entropy_coeff: 0.009999999999999998
          kl: 0.003203019873986119
          policy_loss: 0.019210163586669497
          total_loss: 0.021973281684848998
          vf_explained_var: 0.13968850672245026
          vf_loss: 0.01213882120533122
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,289,7795.12,289000,-3.3045,-2.5,-7.77,323.62




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-21_21-17-54
  done: false
  episode_len_mean: 321.25
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.280799999999975
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 783
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.0232589324315389
          entropy_coeff: 0.009999999999999998
          kl: 0.020714469460836802
          policy_loss: 0.006007636338472367
          total_loss: 0.00913621327943272
          vf_explained_var: 0.39452460408210754
          vf_loss: 0.010043108866860469
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,290,7849.07,290000,-3.2808,-2.22,-7.77,321.25


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-21_21-18-30
  done: false
  episode_len_mean: 320.2
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.2702999999999753
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 787
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.0322499129507277
          entropy_coeff: 0.009999999999999998
          kl: 0.01026957247009924
          policy_loss: 0.07285118790136444
          total_loss: 0.07401459001832539
          vf_explained_var: 0.5816954970359802
          vf_loss: 0.009018422541622486
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,291,7884.55,291000,-3.2703,-2.22,-7.77,320.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-21_21-19-04
  done: false
  episode_len_mean: 318.36
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.2518999999999756
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 790
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.0767634524239433
          entropy_coeff: 0.009999999999999998
          kl: 0.022721735017923253
          policy_loss: -0.012645408552553918
          total_loss: -0.015473645884129736
          vf_explained_var: 0.9146797060966492
          vf_loss: 0.0024800252324591082
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,292,7918.97,292000,-3.2519,-2.22,-7.77,318.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-21_21-19-39
  done: false
  episode_len_mean: 315.83
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.2265999999999764
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 794
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.176761751042472
          entropy_coeff: 0.009999999999999998
          kl: 0.02066325189655542
          policy_loss: -0.0056845486991935305
          total_loss: -0.0014237641460365718
          vf_explained_var: 0.5528036952018738
          vf_loss: 0.008581230259086524
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,293,7953.3,293000,-3.2266,-2.22,-7.77,315.83


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-21_21-20-16
  done: false
  episode_len_mean: 312.05
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.188799999999978
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 798
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 0.9619937658309936
          entropy_coeff: 0.009999999999999998
          kl: 0.013581750782638045
          policy_loss: -0.019934512343671586
          total_loss: -0.015186323556635115
          vf_explained_var: 0.6518568992614746
          vf_loss: 0.007025699004427426
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,294,7990.16,294000,-3.1888,-2.22,-7.77,312.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-21_21-20-45
  done: false
  episode_len_mean: 311.31
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.181399999999978
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 801
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.3927111056115893
          entropy_coeff: 0.009999999999999998
          kl: 0.017168410417261547
          policy_loss: 0.00931897179947959
          total_loss: 0.013405130141311221
          vf_explained_var: 0.5204965472221375
          vf_loss: 0.00873185485187504
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 29500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,295,8019.63,295000,-3.1814,-2.22,-7.77,311.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-21_21-21-17
  done: false
  episode_len_mean: 309.36
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1618999999999784
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 804
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.2508955895900726
          entropy_coeff: 0.009999999999999998
          kl: 0.026882467414564813
          policy_loss: -0.12941215203868017
          total_loss: -0.12159367824594179
          vf_explained_var: 0.7117484211921692
          vf_loss: 0.005794506278147714
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,296,8052.01,296000,-3.1619,-2.22,-7.77,309.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-21_21-21-50
  done: false
  episode_len_mean: 308.09
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.149199999999978
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 4
  episodes_total: 808
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2948958026038275
          entropy_coeff: 0.009999999999999998
          kl: 0.007797701125865212
          policy_loss: -0.030437209871080187
          total_loss: -0.0347113495071729
          vf_explained_var: 0.8853574395179749
          vf_loss: 0.002351550429335071
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,297,8084.1,297000,-3.1492,-2.22,-7.77,308.09




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-21_21-22-41
  done: false
  episode_len_mean: 305.89
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.127199999999979
  episode_reward_min: -7.769999999999964
  episodes_this_iter: 3
  episodes_total: 811
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.1572086334228515
          entropy_coeff: 0.009999999999999998
          kl: 0.0065841474838862265
          policy_loss: -0.05700188188089265
          total_loss: -0.059004035964608195
          vf_explained_var: 0.5533361434936523
          vf_loss: 0.004230750375427306
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,298,8135.57,298000,-3.1272,-2.22,-7.77,305.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-21_21-23-13
  done: false
  episode_len_mean: 302.14
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.050299999999979
  episode_reward_min: -6.459999999999971
  episodes_this_iter: 4
  episodes_total: 815
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2381215042538114
          entropy_coeff: 0.009999999999999998
          kl: 0.011895364512649111
          policy_loss: 0.0317370298008124
          total_loss: 0.03458746928307745
          vf_explained_var: 0.6327933073043823
          vf_loss: 0.005585525519887192
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,299,8167.86,299000,-3.0503,-2.22,-6.46,302.14


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-21_21-23-45
  done: false
  episode_len_mean: 301.01
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0389999999999793
  episode_reward_min: -6.459999999999971
  episodes_this_iter: 3
  episodes_total: 818
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.363813665178087
          entropy_coeff: 0.009999999999999998
          kl: 0.009560474184163522
          policy_loss: 0.0643673246105512
          total_loss: 0.06233562781578965
          vf_explained_var: 0.7926248908042908
          vf_loss: 0.003853712678473029
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,300,8199.36,300000,-3.039,-2.22,-6.46,301.01


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-21_21-24-14
  done: false
  episode_len_mean: 300.84
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0374999999999788
  episode_reward_min: -6.459999999999971
  episodes_this_iter: 3
  episodes_total: 821
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.6898035128911337
          entropy_coeff: 0.009999999999999998
          kl: 0.013681478136569246
          policy_loss: 0.06834509621063868
          total_loss: 0.06535969566967752
          vf_explained_var: 0.7381706237792969
          vf_loss: 0.002818122254878593
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 3010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,301,8228.06,301000,-3.0375,-2.22,-6.46,300.84


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-21_21-24-47
  done: false
  episode_len_mean: 298.96
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.98959999999998
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 824
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2343876361846924
          entropy_coeff: 0.009999999999999998
          kl: 0.006845069893120974
          policy_loss: -0.14365775105026032
          total_loss: -0.14459694359037611
          vf_explained_var: 0.7170701026916504
          vf_loss: 0.00585391553144695
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 30200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,302,8261.49,302000,-2.9896,-2.22,-4.7,298.96


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-21_21-25-18
  done: false
  episode_len_mean: 297.12
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9711999999999796
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 828
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.3194588210847642
          entropy_coeff: 0.009999999999999998
          kl: 0.01898811891064827
          policy_loss: -0.022168080343140495
          total_loss: -0.016928562190797594
          vf_explained_var: 0.8884050250053406
          vf_loss: 0.0030363631182505437
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,303,8292.26,303000,-2.9712,-2.22,-4.7,297.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-21_21-25-48
  done: false
  episode_len_mean: 296.34
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9633999999999805
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 831
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2760806931389703
          entropy_coeff: 0.009999999999999998
          kl: 0.009248785766547604
          policy_loss: 0.06275261259741254
          total_loss: 0.060184645363026194
          vf_explained_var: 0.9279566407203674
          vf_loss: 0.0026928661915007978
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 30

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,304,8322.61,304000,-2.9634,-2.22,-4.7,296.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-21_21-26-16
  done: false
  episode_len_mean: 296.81
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.96809999999998
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 834
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.580295745531718
          entropy_coeff: 0.009999999999999998
          kl: 0.012089758593724772
          policy_loss: 0.06429478915201293
          total_loss: 0.06077870196766323
          vf_explained_var: 0.9103429913520813
          vf_loss: 0.0024831107235513627
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,305,8350.21,305000,-2.9681,-2.22,-4.7,296.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-21_21-26-47
  done: false
  episode_len_mean: 295.69
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9568999999999805
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 837
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2403627283043333
          entropy_coeff: 0.009999999999999998
          kl: 0.013449091478628992
          policy_loss: -0.03034251059095065
          total_loss: -0.02939416691660881
          vf_explained_var: 0.953696608543396
          vf_loss: 0.002445902286045667
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,306,8381.54,306000,-2.9569,-2.22,-4.7,295.69




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-21_21-27-33
  done: false
  episode_len_mean: 295.59
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.955899999999981
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 840
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 0.8538995613654454
          entropy_coeff: 0.009999999999999998
          kl: 0.01431179000605047
          policy_loss: -0.01693024800883399
          total_loss: -0.0014539561337894864
          vf_explained_var: 0.8281078934669495
          vf_loss: 0.01240964699536562
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,307,8426.95,307000,-2.9559,-2.22,-4.7,295.59


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-21_21-28-02
  done: false
  episode_len_mean: 296.63
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.97619999999998
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 843
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.0937622043821547
          entropy_coeff: 0.009999999999999998
          kl: 0.009915604890210419
          policy_loss: 0.014823492450846566
          total_loss: 0.023056350234482024
          vf_explained_var: 0.7109197974205017
          vf_loss: 0.011129769853626688
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 3080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,308,8455.84,308000,-2.9762,-2.22,-4.7,296.63


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-21_21-28-32
  done: false
  episode_len_mean: 296.73
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9771999999999803
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 846
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.2249585893419055
          entropy_coeff: 0.009999999999999998
          kl: 0.0071623482110859445
          policy_loss: -0.13328664186928008
          total_loss: -0.13307495137883557
          vf_explained_var: 0.6323071718215942
          vf_loss: 0.0066532234363775285
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,309,8486.38,309000,-2.9772,-2.22,-4.7,296.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-21_21-29-02
  done: false
  episode_len_mean: 294.27
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9525999999999812
  episode_reward_min: -4.6399999999999455
  episodes_this_iter: 4
  episodes_total: 850
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.0895236796802945
          entropy_coeff: 0.009999999999999998
          kl: 0.0031402144478515403
          policy_loss: -0.015089941355917189
          total_loss: -0.013827117946412828
          vf_explained_var: 0.5172232389450073
          vf_loss: 0.009611615502379007
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,310,8515.86,310000,-2.9526,-2.22,-4.64,294.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-21_21-29-31
  done: false
  episode_len_mean: 292.35
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.933399999999982
  episode_reward_min: -4.6399999999999455
  episodes_this_iter: 3
  episodes_total: 853
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.4164256082640754
          entropy_coeff: 0.009999999999999998
          kl: 0.010277689355187074
          policy_loss: 0.020749402542908985
          total_loss: 0.01641637103425132
          vf_explained_var: 0.7027444839477539
          vf_loss: 0.005664060769292215
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 31

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,311,8545.43,311000,-2.9334,-2.22,-4.64,292.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-21_21-30-00
  done: false
  episode_len_mean: 293.38
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9436999999999807
  episode_reward_min: -4.6399999999999455
  episodes_this_iter: 3
  episodes_total: 856
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2109958781136407
          entropy_coeff: 0.009999999999999998
          kl: 0.010498441068580415
          policy_loss: 0.060502823462916744
          total_loss: 0.05520292458434899
          vf_explained_var: 0.8882578611373901
          vf_loss: 0.0025533883159773217
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,312,8574.28,312000,-2.9437,-2.22,-4.64,293.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-21_21-30-25
  done: false
  episode_len_mean: 295.1
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.960899999999981
  episode_reward_min: -4.6399999999999455
  episodes_this_iter: 2
  episodes_total: 858
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.3819529241985744
          entropy_coeff: 0.009999999999999998
          kl: 0.011733548786856634
          policy_loss: -0.08093480169773101
          total_loss: -0.0845103296968672
          vf_explained_var: 0.7239720821380615
          vf_loss: 0.005486549209389422
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,313,8598.91,313000,-2.9609,-2.22,-4.64,295.1


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-21_21-30-46
  done: false
  episode_len_mean: 298.6
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.99589999999998
  episode_reward_min: -5.069999999999936
  episodes_this_iter: 3
  episodes_total: 861
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.0226263086001077
          entropy_coeff: 0.009999999999999998
          kl: 0.010874095570927316
          policy_loss: 0.02948655237754186
          total_loss: 0.02957114941544003
          vf_explained_var: 0.5070223212242126
          vf_loss: 0.005901878768215991
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,314,8620.42,314000,-2.9959,-2.22,-5.07,298.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-21_21-31-14
  done: false
  episode_len_mean: 300.27
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0125999999999795
  episode_reward_min: -5.069999999999936
  episodes_this_iter: 3
  episodes_total: 864
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2012132710880703
          entropy_coeff: 0.009999999999999998
          kl: 0.007939605981435468
          policy_loss: 0.00013562109735276964
          total_loss: 0.0008402135637071398
          vf_explained_var: 0.24707698822021484
          vf_loss: 0.009497555955830548
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,315,8647.98,315000,-3.0126,-2.22,-5.07,300.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-21_21-31-40
  done: false
  episode_len_mean: 302.13
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.031199999999979
  episode_reward_min: -5.069999999999936
  episodes_this_iter: 2
  episodes_total: 866
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.3057360728581746
          entropy_coeff: 0.009999999999999998
          kl: 0.008013010160482163
          policy_loss: -0.1228052039941152
          total_loss: -0.12313801878028446
          vf_explained_var: 0.30643606185913086
          vf_loss: 0.009475615921999431
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 31

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,316,8674.54,316000,-3.0312,-2.22,-5.07,302.13


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-21_21-32-06
  done: false
  episode_len_mean: 304.89
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.058799999999979
  episode_reward_min: -5.069999999999936
  episodes_this_iter: 3
  episodes_total: 869
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2229444622993468
          entropy_coeff: 0.009999999999999998
          kl: 0.011451663469699665
          policy_loss: -0.07591082296437687
          total_loss: -0.07412432341112031
          vf_explained_var: 0.6017182469367981
          vf_loss: 0.0093727853278526
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 3170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,317,8699.73,317000,-3.0588,-2.22,-5.07,304.89




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-21_21-32-50
  done: false
  episode_len_mean: 308.51
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.104699999999978
  episode_reward_min: -5.269999999999944
  episodes_this_iter: 3
  episodes_total: 872
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2920142610867817
          entropy_coeff: 0.009999999999999998
          kl: 0.0187914397284458
          policy_loss: 0.02753941458132532
          total_loss: 0.03388769432074494
          vf_explained_var: 0.5501846671104431
          vf_loss: 0.011649296295622157
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,318,8744.55,318000,-3.1047,-2.22,-5.27,308.51


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-21_21-33-16
  done: false
  episode_len_mean: 310.69
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1264999999999774
  episode_reward_min: -5.269999999999944
  episodes_this_iter: 2
  episodes_total: 874
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.409136970837911
          entropy_coeff: 0.009999999999999998
          kl: 0.014998990265704344
          policy_loss: -0.034872738189167446
          total_loss: -0.03637973103258345
          vf_explained_var: 0.32101887464523315
          vf_loss: 0.006502917970323728
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,319,8769.82,319000,-3.1265,-2.22,-5.27,310.69


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-21_21-33-42
  done: false
  episode_len_mean: 314.24
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1619999999999764
  episode_reward_min: -5.269999999999944
  episodes_this_iter: 3
  episodes_total: 877
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.3642896321084765
          entropy_coeff: 0.009999999999999998
          kl: 0.009900075483426068
          policy_loss: -0.03150347053176827
          total_loss: -0.032538952057560286
          vf_explained_var: 0.6264523267745972
          vf_loss: 0.008593357336293492
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,320,8796.09,320000,-3.162,-2.22,-5.27,314.24


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-21_21-34-10
  done: false
  episode_len_mean: 317.01
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.189699999999976
  episode_reward_min: -5.269999999999944
  episodes_this_iter: 3
  episodes_total: 880
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.0946609093083275
          entropy_coeff: 0.009999999999999998
          kl: 0.009543697316785248
          policy_loss: -0.00766311999824312
          total_loss: -0.009105650087197622
          vf_explained_var: 0.48395320773124695
          vf_loss: 0.005634521156187273
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,321,8823.86,321000,-3.1897,-2.34,-5.27,317.01


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-21_21-34-33
  done: false
  episode_len_mean: 319.7
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.2165999999999757
  episode_reward_min: -5.269999999999944
  episodes_this_iter: 2
  episodes_total: 882
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.4330005367596945
          entropy_coeff: 0.009999999999999998
          kl: 0.017210444181611547
          policy_loss: -0.11327589452266693
          total_loss: -0.11398755841785008
          vf_explained_var: 0.696868360042572
          vf_loss: 0.006640241260174662
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 3220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,322,8846.67,322000,-3.2166,-2.34,-5.27,319.7


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-21_21-34-52
  done: false
  episode_len_mean: 325.6
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.2755999999999745
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 885
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.4695157514678108
          entropy_coeff: 0.009999999999999998
          kl: 0.013375091775530161
          policy_loss: -0.018913811859157352
          total_loss: -0.021783715652094946
          vf_explained_var: 0.734582245349884
          vf_loss: 0.00640222601052503
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,323,8865.77,323000,-3.2756,-2.34,-5.5,325.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-21_21-35-15
  done: false
  episode_len_mean: 327.53
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.2948999999999744
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 887
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.5257480727301704
          entropy_coeff: 0.009999999999999998
          kl: 0.011539214842330224
          policy_loss: -0.09241306318177117
          total_loss: -0.09356405122412575
          vf_explained_var: 0.6208950877189636
          vf_loss: 0.00942783248093393
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,324,8889.27,324000,-3.2949,-2.34,-5.5,327.53


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-21_21-35-41
  done: false
  episode_len_mean: 330.85
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.328099999999974
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 890
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.436710602707333
          entropy_coeff: 0.009999999999999998
          kl: 0.008708640516126357
          policy_loss: 0.02125318327711688
          total_loss: 0.019312538703282676
          vf_explained_var: 0.5520676374435425
          vf_loss: 0.008895477649962737
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 32500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,325,8915,325000,-3.3281,-2.34,-5.5,330.85


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-21_21-36-08
  done: false
  episode_len_mean: 332.52
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.3546999999999736
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 892
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.429672156439887
          entropy_coeff: 0.009999999999999998
          kl: 0.019412547492332895
          policy_loss: -0.09342139926221636
          total_loss: -0.09149770306216345
          vf_explained_var: 0.7837722897529602
          vf_loss: 0.008349451060510344
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,326,8941.97,326000,-3.3547,-2.34,-5.5,332.52


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-21_21-36-38
  done: false
  episode_len_mean: 335.78
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.387299999999972
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 896
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2342731608284845
          entropy_coeff: 0.009999999999999998
          kl: 0.010284074044148649
          policy_loss: -0.04817491604222192
          total_loss: -0.04569757166835997
          vf_explained_var: 0.5050758719444275
          vf_loss: 0.01065032106772479
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 3270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,327,8971.78,327000,-3.3873,-2.34,-5.5,335.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-21_21-37-07
  done: false
  episode_len_mean: 337.32
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4026999999999714
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 899
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.2402507325013479
          entropy_coeff: 0.009999999999999998
          kl: 0.008875823534445972
          policy_loss: 0.015279563764731089
          total_loss: 0.015336665014425913
          vf_explained_var: 0.5774902105331421
          vf_loss: 0.008860842359717935
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 32

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,328,9001.17,328000,-3.4027,-2.34,-5.5,337.32




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-21_21-37-55
  done: false
  episode_len_mean: 337.23
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4017999999999717
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 902
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.5080928405125935
          entropy_coeff: 0.009999999999999998
          kl: 0.010861433720001814
          policy_loss: 0.05220806681447559
          total_loss: 0.046377605862087674
          vf_explained_var: 0.6988080143928528
          vf_loss: 0.004846617849802392
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,329,9048.65,329000,-3.4018,-2.34,-5.5,337.23


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-21_21-38-22
  done: false
  episode_len_mean: 339.36
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.423099999999971
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 904
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.798514511850145
          entropy_coeff: 0.009999999999999998
          kl: 0.013334595929105022
          policy_loss: -0.11157961885134379
          total_loss: -0.11340343985292646
          vf_explained_var: -0.05144280940294266
          vf_loss: 0.01075471485964954
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,330,9075.42,330000,-3.4231,-2.34,-5.5,339.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-21_21-38-45
  done: false
  episode_len_mean: 341.57
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.445199999999971
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 907
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.983708205487993
          entropy_coeff: 0.009999999999999998
          kl: 0.014230805910726039
          policy_loss: -0.028458768874406813
          total_loss: -0.03513456703060203
          vf_explained_var: 0.4812021851539612
          vf_loss: 0.007391295544544442
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,331,9098.96,331000,-3.4452,-2.34,-5.5,341.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-21_21-39-14
  done: false
  episode_len_mean: 342.79
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4573999999999705
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 910
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.3914832578765022
          entropy_coeff: 0.009999999999999998
          kl: 0.014686410065614395
          policy_loss: 0.014687371088398828
          total_loss: 0.018723080886734855
          vf_explained_var: 0.524298369884491
          vf_loss: 0.011995828570798039
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,332,9127.36,332000,-3.4574,-2.34,-5.5,342.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-21_21-39-44
  done: false
  episode_len_mean: 343.37
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4631999999999703
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 913
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.0088512268331316
          entropy_coeff: 0.009999999999999998
          kl: 0.009193398411223146
          policy_loss: -0.12648266876737277
          total_loss: -0.12469499599602488
          vf_explained_var: 0.7472435235977173
          vf_loss: 0.0081486546408592
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 3330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,333,9157.7,333000,-3.4632,-2.34,-5.5,343.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-21_21-40-18
  done: false
  episode_len_mean: 343.01
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4595999999999707
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 917
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.0525923483901554
          entropy_coeff: 0.009999999999999998
          kl: 0.01599063486273514
          policy_loss: -0.05769971116549439
          total_loss: -0.05444010976288054
          vf_explained_var: 0.4986712634563446
          vf_loss: 0.007302005534681181
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,334,9191.93,334000,-3.4596,-2.34,-5.5,343.01


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-21_21-40-52
  done: false
  episode_len_mean: 340.81
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4375999999999713
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 921
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.1293354372183482
          entropy_coeff: 0.009999999999999998
          kl: 0.010361592473793103
          policy_loss: -0.018448143038484785
          total_loss: -0.014210426145129733
          vf_explained_var: 0.2097989022731781
          vf_loss: 0.011329886249990926
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,335,9225.25,335000,-3.4376,-2.34,-5.5,340.81


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-21_21-41-23
  done: false
  episode_len_mean: 341.09
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.440399999999971
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 924
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.40545730590820295
          cur_lr: 5.000000000000001e-05
          entropy: 1.194424174229304
          entropy_coeff: 0.009999999999999998
          kl: 0.027247279554757142
          policy_loss: 0.03731817644503382
          total_loss: 0.043570098363690905
          vf_explained_var: 0.6174387335777283
          vf_loss: 0.007148554544740667
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 33600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,336,9257.16,336000,-3.4404,-2.34,-5.5,341.09


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-21_21-41-56
  done: false
  episode_len_mean: 340.87
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.438199999999971
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 927
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.1649912814299266
          entropy_coeff: 0.009999999999999998
          kl: 0.006568423031185135
          policy_loss: -0.09400231788555781
          total_loss: -0.09186833682987425
          vf_explained_var: 0.3990946412086487
          vf_loss: 0.009789067279133533
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 3370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,337,9289.23,337000,-3.4382,-2.34,-5.5,340.87




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-21_21-42-43
  done: false
  episode_len_mean: 341.32
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.44269999999997
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 931
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 1.0984430611133575
          entropy_coeff: 0.009999999999999998
          kl: 0.01030487636126046
          policy_loss: -0.07684328166974916
          total_loss: -0.0721837941557169
          vf_explained_var: 0.583359956741333
          vf_loss: 0.009376640007313754
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,338,9336.62,338000,-3.4427,-2.34,-5.5,341.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-21_21-43-17
  done: false
  episode_len_mean: 338.75
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4169999999999714
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 934
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6081859588623049
          cur_lr: 5.000000000000001e-05
          entropy: 0.8610538250870174
          entropy_coeff: 0.009999999999999998
          kl: 0.0046991680183574485
          policy_loss: -0.10261861152119106
          total_loss: -0.09744293085402912
          vf_explained_var: 0.34421423077583313
          vf_loss: 0.010928252913678686
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,339,9370.86,339000,-3.417,-2.34,-5.5,338.75


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-21_21-43-48
  done: false
  episode_len_mean: 338.17
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.4111999999999716
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 938
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 0.9537416120370229
          entropy_coeff: 0.009999999999999998
          kl: 0.013219693581600547
          policy_loss: -0.12248225005136595
          total_loss: -0.11670995520220863
          vf_explained_var: 0.6230820417404175
          vf_loss: 0.011289693576852895
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,340,9401.47,340000,-3.4112,-2.34,-5.5,338.17


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-21_21-44-21
  done: false
  episode_len_mean: 335.17
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3712999999999713
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 941
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 0.8557697799470689
          entropy_coeff: 0.009999999999999998
          kl: 0.007480162698881701
          policy_loss: -0.15405980779065026
          total_loss: -0.14852215291725265
          vf_explained_var: 0.38827595114707947
          vf_loss: 0.011820688973077469
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,341,9434.87,341000,-3.3713,-2.38,-5.5,335.17


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-21_21-44-53
  done: false
  episode_len_mean: 334.89
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3684999999999725
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 945
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.1228176249398125
          entropy_coeff: 0.009999999999999998
          kl: 0.013672689686084519
          policy_loss: 0.07109919295956692
          total_loss: 0.07318654412196743
          vf_explained_var: 0.7643261551856995
          vf_loss: 0.009157758263043232
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,342,9466.43,342000,-3.3685,-2.38,-5.5,334.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-21_21-45-28
  done: false
  episode_len_mean: 333.58
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.355399999999973
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 948
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 0.9255236042870416
          entropy_coeff: 0.009999999999999998
          kl: 0.010275567179540385
          policy_loss: -0.09963979456159804
          total_loss: -0.09431957370705074
          vf_explained_var: 0.3437243700027466
          vf_loss: 0.011450726721280564
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,343,9501.45,343000,-3.3554,-2.38,-5.5,333.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-21_21-45-54
  done: false
  episode_len_mean: 334.9
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3685999999999727
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 951
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30409297943115243
          cur_lr: 5.000000000000001e-05
          entropy: 1.4651929828855725
          entropy_coeff: 0.009999999999999998
          kl: 0.025614157063205942
          policy_loss: -0.044789055776264936
          total_loss: -0.0469636963473426
          vf_explained_var: 0.7472261190414429
          vf_loss: 0.004688202822580934
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,344,9527.27,344000,-3.3686,-2.38,-5.5,334.9


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-21_21-46-25
  done: false
  episode_len_mean: 333.99
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3594999999999726
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 955
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.0617035574383207
          entropy_coeff: 0.009999999999999998
          kl: 0.016386414635920863
          policy_loss: -0.039530085668795636
          total_loss: -0.032094215653422804
          vf_explained_var: 0.4099594056606293
          vf_loss: 0.010578416487098568
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,345,9558.26,345000,-3.3595,-2.38,-5.5,333.99


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-21_21-46-49
  done: false
  episode_len_mean: 335.62
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.375799999999972
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 957
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.7475789533721076
          entropy_coeff: 0.009999999999999998
          kl: 0.01745595190900049
          policy_loss: 0.07261300616794163
          total_loss: 0.06791443477074305
          vf_explained_var: -0.16059976816177368
          vf_loss: 0.004814864795965453
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,346,9582.27,346000,-3.3758,-2.38,-5.5,335.62




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-21_21-47-38
  done: false
  episode_len_mean: 332.91
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.348699999999973
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 960
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.047866079542372
          entropy_coeff: 0.009999999999999998
          kl: 0.012751078207787082
          policy_loss: -0.15923282214336926
          total_loss: -0.15421761100490888
          vf_explained_var: 0.4146115481853485
          vf_loss: 0.009677601083078318
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,347,9631.65,347000,-3.3487,-2.38,-5.5,332.91


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-21_21-48-04
  done: false
  episode_len_mean: 330.76
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3271999999999737
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 963
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.1476800342400868
          entropy_coeff: 0.009999999999999998
          kl: 0.014543908172338913
          policy_loss: -0.13320568510227734
          total_loss: -0.1264062395526303
          vf_explained_var: 0.23501046001911163
          vf_loss: 0.011642196458867854
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,348,9657.14,348000,-3.3272,-2.38,-5.5,330.76


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-21_21-48-25
  done: false
  episode_len_mean: 332.47
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.344299999999973
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 965
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.771375717057122
          entropy_coeff: 0.009999999999999998
          kl: 0.013865855428334722
          policy_loss: -0.1332534952296151
          total_loss: -0.13684205752280024
          vf_explained_var: 0.16048772633075714
          vf_loss: 0.007800426709258722
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,349,9678.77,349000,-3.3443,-2.38,-5.5,332.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-21_21-48-57
  done: false
  episode_len_mean: 331.86
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3381999999999734
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 969
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.2218883792559305
          entropy_coeff: 0.009999999999999998
          kl: 0.013127618049308154
          policy_loss: -0.043039207119080755
          total_loss: -0.04088989744583766
          vf_explained_var: 0.578287661075592
          vf_loss: 0.008380167005169723
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,350,9710.24,350000,-3.3382,-2.38,-5.5,331.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-21_21-49-24
  done: false
  episode_len_mean: 330.98
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3196999999999743
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 972
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.3623899532688988
          entropy_coeff: 0.009999999999999998
          kl: 0.017341226248647836
          policy_loss: 0.002655214940508207
          total_loss: 0.003195451572537422
          vf_explained_var: 0.4543473422527313
          vf_loss: 0.006254116833830873
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,351,9737.63,351000,-3.3197,-2.38,-5.5,330.98


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-21_21-49-50
  done: false
  episode_len_mean: 330.66
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.316499999999974
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 2
  episodes_total: 974
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.4733549310101404
          entropy_coeff: 0.009999999999999998
          kl: 0.010941470831471081
          policy_loss: -0.09887265976932313
          total_loss: -0.10077911507752206
          vf_explained_var: 0.0012162593193352222
          vf_loss: 0.007836260126593213
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,352,9763.45,352000,-3.3165,-2.38,-5.5,330.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-21_21-50-12
  done: false
  episode_len_mean: 331.83
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3281999999999727
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 977
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.8361553880903456
          entropy_coeff: 0.009999999999999998
          kl: 0.012903140541214499
          policy_loss: -0.0059027277761035495
          total_loss: -0.008488632159100638
          vf_explained_var: -0.08298470824956894
          vf_loss: 0.009890016702572918
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,353,9785.72,353000,-3.3282,-2.38,-5.5,331.83


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-21_21-50-42
  done: false
  episode_len_mean: 331.56
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.325499999999973
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 980
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.350160410669115
          entropy_coeff: 0.009999999999999998
          kl: 0.00973897077089517
          policy_loss: 0.027253664367728764
          total_loss: 0.02922171817885505
          vf_explained_var: 0.16518177092075348
          vf_loss: 0.011027326414154635
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 3540

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,354,9814.79,354000,-3.3255,-2.38,-5.5,331.56


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-21_21-51-10
  done: false
  episode_len_mean: 329.06
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3004999999999733
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 983
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.161882722377777
          entropy_coeff: 0.009999999999999998
          kl: 0.017141569132248808
          policy_loss: 0.043041669494575927
          total_loss: 0.04787308126688004
          vf_explained_var: 0.29985079169273376
          vf_loss: 0.008631292989270555
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,355,9843.47,355000,-3.3005,-2.38,-5.5,329.06


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-21_21-51-41
  done: false
  episode_len_mean: 325.2
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2618999999999745
  episode_reward_min: -5.199999999999934
  episodes_this_iter: 3
  episodes_total: 986
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.011851237879859
          entropy_coeff: 0.009999999999999998
          kl: 0.006888389333127836
          policy_loss: 0.02797636083430714
          total_loss: 0.02814661951528655
          vf_explained_var: 0.5239356160163879
          vf_loss: 0.007146703712512843
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 35600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,356,9873.75,356000,-3.2619,-2.38,-5.2,325.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-21_21-52-14
  done: false
  episode_len_mean: 322.96
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2394999999999743
  episode_reward_min: -5.199999999999934
  episodes_this_iter: 3
  episodes_total: 989
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 1.0398213764031727
          entropy_coeff: 0.009999999999999998
          kl: 0.011271496498671495
          policy_loss: -0.024538877937528823
          total_loss: -0.02061371256907781
          vf_explained_var: 0.6520624756813049
          vf_loss: 0.009182006968573356
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,357,9906.81,357000,-3.2395,-2.38,-5.2,322.96




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-21_21-53-03
  done: false
  episode_len_mean: 317.71
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.177099999999976
  episode_reward_min: -5.199999999999934
  episodes_this_iter: 4
  episodes_total: 993
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45613946914672854
          cur_lr: 5.000000000000001e-05
          entropy: 0.6374208622508579
          entropy_coeff: 0.009999999999999998
          kl: 0.003593342920429047
          policy_loss: -0.01382885438700517
          total_loss: -0.0036408468253082697
          vf_explained_var: 0.15759685635566711
          vf_loss: 0.014923147039694919
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,358,9956.58,358000,-3.1771,-2.32,-5.2,317.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-21_21-53-30
  done: false
  episode_len_mean: 319.71
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.1970999999999754
  episode_reward_min: -5.199999999999934
  episodes_this_iter: 3
  episodes_total: 996
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22806973457336427
          cur_lr: 5.000000000000001e-05
          entropy: 1.758842294745975
          entropy_coeff: 0.009999999999999998
          kl: 0.04794174996805549
          policy_loss: 0.051814133591122094
          total_loss: 0.051006108687983616
          vf_explained_var: 0.31700146198272705
          vf_loss: 0.005846337783926477
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,359,9983.26,359000,-3.1971,-2.32,-5.2,319.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-21_21-53-55
  done: false
  episode_len_mean: 321.38
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.2137999999999756
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 998
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3421046018600464
          cur_lr: 5.000000000000001e-05
          entropy: 1.5319822967052459
          entropy_coeff: 0.009999999999999998
          kl: 0.02883338304693643
          policy_loss: -0.05878048340479533
          total_loss: -0.05844889316293928
          vf_explained_var: -0.008597508072853088
          vf_loss: 0.005787375852297474
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,360,10008.4,360000,-3.2138,-2.32,-5.36,321.38


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-21_21-54-17
  done: false
  episode_len_mean: 324.37
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.243699999999975
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1000
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.7353051874372694
          entropy_coeff: 0.009999999999999998
          kl: 0.01802390665044046
          policy_loss: -0.12728707277112536
          total_loss: -0.1313728561831845
          vf_explained_var: 0.8474992513656616
          vf_loss: 0.004018171116088828
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 36100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,361,10030.5,361000,-3.2437,-2.32,-5.36,324.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-21_21-54-41
  done: false
  episode_len_mean: 326.87
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.2686999999999746
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1003
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.789025154378679
          entropy_coeff: 0.009999999999999998
          kl: 0.012841800778680421
          policy_loss: 0.02944375549753507
          total_loss: 0.021978873759508133
          vf_explained_var: 0.5245177149772644
          vf_loss: 0.0038355132202721304
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,362,10054,362000,-3.2687,-2.32,-5.36,326.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-21_21-55-05
  done: false
  episode_len_mean: 328.16
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.2815999999999743
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1005
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.9505633605851067
          entropy_coeff: 0.009999999999999998
          kl: 0.012025919190300351
          policy_loss: 0.09254027207692464
          total_loss: 0.08326670941379335
          vf_explained_var: 0.21532867848873138
          vf_loss: 0.004060889455851995
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,363,10077.8,363000,-3.2816,-2.32,-5.36,328.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-21_21-55-27
  done: false
  episode_len_mean: 328.92
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.2891999999999735
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1007
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.677135243680742
          entropy_coeff: 0.009999999999999998
          kl: 0.019719988514965082
          policy_loss: -0.1331629925303989
          total_loss: -0.13009208531843292
          vf_explained_var: -0.1095890998840332
          vf_loss: 0.009722811620870036
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,364,10099.9,364000,-3.2892,-2.32,-5.36,328.92


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-21_21-55-50
  done: false
  episode_len_mean: 330.42
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.3041999999999736
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1009
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.8332008282343546
          entropy_coeff: 0.009999999999999998
          kl: 0.013227931024691546
          policy_loss: -0.11760586533281538
          total_loss: -0.1259878362218539
          vf_explained_var: 0.8281993269920349
          vf_loss: 0.0031620332764254674
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,365,10122.7,365000,-3.3042,-2.32,-5.36,330.42


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-21_21-56-11
  done: false
  episode_len_mean: 335.97
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.3596999999999726
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1012
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.7909862690501743
          entropy_coeff: 0.009999999999999998
          kl: 0.01110204158926336
          policy_loss: -0.10747741783658664
          total_loss: -0.11217475607991219
          vf_explained_var: 0.5785658359527588
          vf_loss: 0.007515434462887546
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,366,10144.4,366000,-3.3597,-2.32,-5.36,335.97


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-21_21-56-37
  done: false
  episode_len_mean: 336.86
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.3685999999999723
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1014
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 1.8634723875257704
          entropy_coeff: 0.009999999999999998
          kl: 0.011972779468555473
          policy_loss: -0.08696017033523984
          total_loss: -0.09091276534729534
          vf_explained_var: 0.046721745282411575
          vf_loss: 0.008538212290214788
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,367,10169.8,367000,-3.3686,-2.32,-5.36,336.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-21_21-57-07
  done: false
  episode_len_mean: 338.53
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.385299999999972
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 4
  episodes_total: 1018
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5131569027900698
          cur_lr: 5.000000000000001e-05
          entropy: 0.9617548644542694
          entropy_coeff: 0.009999999999999998
          kl: 0.004198214123569106
          policy_loss: 0.014708476430839963
          total_loss: 0.020844938192102643
          vf_explained_var: 0.38654497265815735
          vf_loss: 0.013599665406056576
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,368,10200,368000,-3.3853,-2.32,-5.36,338.53




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-21_21-57-45
  done: false
  episode_len_mean: 341.27
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.4126999999999708
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1020
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2565784513950349
          cur_lr: 5.000000000000001e-05
          entropy: 2.038007416990068
          entropy_coeff: 0.009999999999999998
          kl: 0.028142136680134003
          policy_loss: -0.007060736500554615
          total_loss: -0.01635024468931887
          vf_explained_var: 0.7999665141105652
          vf_loss: 0.003869898792537343
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,369,10238.2,369000,-3.4127,-2.32,-5.36,341.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-21_21-58-10
  done: false
  episode_len_mean: 343.99
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.43989999999997
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1022
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8311692039171854
          entropy_coeff: 0.009999999999999998
          kl: 0.012947255092100641
          policy_loss: -0.06448501886592971
          total_loss: -0.06849954790539212
          vf_explained_var: 0.4730869233608246
          vf_loss: 0.009314185522574311
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 3700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,370,10263.2,370000,-3.4399,-2.32,-5.36,343.99


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-21_21-58-35
  done: false
  episode_len_mean: 347.67
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.4766999999999695
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1025
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6092768086327447
          entropy_coeff: 0.009999999999999998
          kl: 0.011068262007375217
          policy_loss: 0.07396415852838092
          total_loss: 0.07248595216208034
          vf_explained_var: 0.02868177555501461
          vf_loss: 0.010354747475745777
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,371,10287.5,371000,-3.4767,-2.32,-5.36,347.67


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-21_21-59-02
  done: false
  episode_len_mean: 348.85
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.488499999999969
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1028
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.5740687555736965
          entropy_coeff: 0.009999999999999998
          kl: 0.009836129940675854
          policy_loss: 0.08676622187097867
          total_loss: 0.08227472574346595
          vf_explained_var: 0.6724661588668823
          vf_loss: 0.00746358080391979
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,372,10315.1,372000,-3.4885,-2.32,-5.36,348.85


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-21_21-59-31
  done: false
  episode_len_mean: 350.35
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.5034999999999696
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1031
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7095542828241983
          entropy_coeff: 0.009999999999999998
          kl: 0.012819360034454164
          policy_loss: 0.034422899782657626
          total_loss: 0.02986902048190435
          vf_explained_var: 0.6594528555870056
          vf_loss: 0.007607906410056684
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,373,10343.6,373000,-3.5035,-2.32,-5.36,350.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-21_21-59-55
  done: false
  episode_len_mean: 353.26
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.5325999999999693
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1033
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.8771362278196546
          entropy_coeff: 0.009999999999999998
          kl: 0.011820098370184015
          policy_loss: -0.05915937076012293
          total_loss: -0.06470738483799829
          vf_explained_var: 0.6784968972206116
          vf_loss: 0.008674168882943275
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,374,10367.5,374000,-3.5326,-2.32,-5.36,353.26


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-21_22-00-21
  done: false
  episode_len_mean: 356.92
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.569199999999968
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1036
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.929867864979638
          entropy_coeff: 0.009999999999999998
          kl: 0.012369130159260472
          policy_loss: 0.0397644837697347
          total_loss: 0.033049558848142625
          vf_explained_var: 0.5478871464729309
          vf_loss: 0.00782327333258258
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,375,10393.4,375000,-3.5692,-2.32,-5.36,356.92


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-21_22-00-47
  done: false
  episode_len_mean: 357.89
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.578899999999967
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1038
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.6077462156613669
          entropy_coeff: 0.009999999999999998
          kl: 0.018115235309962764
          policy_loss: -0.07655779756605625
          total_loss: -0.07806090033716626
          vf_explained_var: 0.7099959254264832
          vf_loss: 0.007602388955031832
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,376,10420,376000,-3.5789,-2.32,-5.36,357.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-21_22-01-12
  done: false
  episode_len_mean: 362.2
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.621999999999966
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1041
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3848676770925521
          cur_lr: 5.000000000000001e-05
          entropy: 1.7913573768403794
          entropy_coeff: 0.009999999999999998
          kl: 0.02444350347084817
          policy_loss: -0.05725339021947649
          total_loss: -0.054696999821397994
          vf_explained_var: 0.5283967852592468
          vf_loss: 0.011062446962265918
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 3770

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,377,10445.2,377000,-3.622,-2.32,-5.36,362.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-21_22-01-32
  done: false
  episode_len_mean: 365.5
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.654999999999966
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1043
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.949320826265547
          entropy_coeff: 0.009999999999999998
          kl: 0.012143018773627043
          policy_loss: 0.09334961589839723
          total_loss: 0.08249318185779783
          vf_explained_var: 0.039603959769010544
          vf_loss: 0.0016265882996473616
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 3780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,378,10465,378000,-3.655,-2.32,-5.36,365.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-21_22-02-00
  done: false
  episode_len_mean: 367.02
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.6701999999999657
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1046
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.621554426352183
          entropy_coeff: 0.009999999999999998
          kl: 0.012747177627307633
          policy_loss: 0.02003323518567615
          total_loss: 0.014426064325703515
          vf_explained_var: 0.8454005718231201
          vf_loss: 0.003249409624064962
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 3790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,379,10492.5,379000,-3.6702,-2.32,-5.36,367.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-21_22-02-23
  done: false
  episode_len_mean: 370.48
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.704799999999965
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1048
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.8552795052528381
          entropy_coeff: 0.009999999999999998
          kl: 0.012630466313919264
          policy_loss: -0.10213351382149591
          total_loss: -0.10257253862089581
          vf_explained_var: -0.06017925962805748
          vf_loss: 0.010822186266563626
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,380,10515.3,380000,-3.7048,-2.32,-5.36,370.48




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-21_22-03-02
  done: false
  episode_len_mean: 373.88
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.738799999999965
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1050
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6712515619066026
          entropy_coeff: 0.009999999999999998
          kl: 0.01003045331401727
          policy_loss: -0.09730363504754172
          total_loss: -0.09493458304140302
          vf_explained_var: -0.09139148890972137
          vf_loss: 0.013290979322563443
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,381,10554.3,381000,-3.7388,-2.32,-5.36,373.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-21_22-03-28
  done: false
  episode_len_mean: 374.93
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.7492999999999643
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1052
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6363167193200854
          entropy_coeff: 0.009999999999999998
          kl: 0.010197643857074942
          policy_loss: -0.08649659852186839
          total_loss: -0.08597833630111483
          vf_explained_var: 0.40708300471305847
          vf_loss: 0.010994315730770015
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,382,10580.2,382000,-3.7493,-2.32,-5.36,374.93


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-21_22-03-53
  done: false
  episode_len_mean: 378.96
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.7895999999999628
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1055
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.6594468818770514
          entropy_coeff: 0.009999999999999998
          kl: 0.011685769907709068
          policy_loss: -0.03559779359234704
          total_loss: -0.03367801043722365
          vf_explained_var: -0.02389957383275032
          vf_loss: 0.0117680392616118
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,383,10605.1,383000,-3.7896,-2.32,-5.36,378.96


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-21_22-04-17
  done: false
  episode_len_mean: 378.06
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.7805999999999638
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1057
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.4860945463180542
          entropy_coeff: 0.009999999999999998
          kl: 0.011010705778383133
          policy_loss: -0.13094610141383278
          total_loss: -0.1321941163804796
          vf_explained_var: 0.23250643908977509
          vf_loss: 0.007256427986430935
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,384,10629.5,384000,-3.7806,-2.32,-5.36,378.06


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-21_22-04-39
  done: false
  episode_len_mean: 382.86
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.8285999999999625
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1059
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.468342161178589
          entropy_coeff: 0.009999999999999998
          kl: 0.006869423040476224
          policy_loss: -0.12043556206756167
          total_loss: -0.12321148051155938
          vf_explained_var: -0.061335258185863495
          vf_loss: 0.007941774864674598
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,385,10651.4,385000,-3.8286,-2.32,-5.36,382.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-21_22-05-02
  done: false
  episode_len_mean: 386.31
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.8630999999999616
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1061
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.3435692164633009
          entropy_coeff: 0.009999999999999998
          kl: 0.009354767362464243
          policy_loss: -0.239135159076088
          total_loss: -0.23569788922452264
          vf_explained_var: -0.15688303112983704
          vf_loss: 0.011472440076371033
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,386,10675,386000,-3.8631,-2.32,-5.36,386.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-21_22-05-27
  done: false
  episode_len_mean: 386.65
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.8664999999999616
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1064
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.5553367959128486
          entropy_coeff: 0.009999999999999998
          kl: 0.01515815708834512
          policy_loss: -0.049435293757253224
          total_loss: -0.048533051047060224
          vf_explained_var: 0.7703943848609924
          vf_loss: 0.007704782537702057
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,387,10699.2,387000,-3.8665,-2.32,-5.36,386.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-21_22-05-50
  done: false
  episode_len_mean: 387.58
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.8757999999999617
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1066
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5773015156388283
          cur_lr: 5.000000000000001e-05
          entropy: 1.4398066679636636
          entropy_coeff: 0.009999999999999998
          kl: 0.00353152709143632
          policy_loss: 0.054120354188813106
          total_loss: 0.0487799185845587
          vf_explained_var: 0.05956033989787102
          vf_loss: 0.007018875993607152
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 3880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,388,10722.1,388000,-3.8758,-2.32,-5.36,387.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-21_22-06-11
  done: false
  episode_len_mean: 390.58
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.905799999999961
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1068
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.3859256426493327
          entropy_coeff: 0.009999999999999998
          kl: 0.015130186540230758
          policy_loss: -0.0911030489537451
          total_loss: -0.09164729615052541
          vf_explained_var: 0.29312267899513245
          vf_loss: 0.008947667681301634
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,389,10743.9,389000,-3.9058,-2.32,-5.36,390.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-21_22-06-36
  done: false
  episode_len_mean: 393.42
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.934199999999961
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1070
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.5792150484191048
          entropy_coeff: 0.009999999999999998
          kl: 0.013628601795348376
          policy_loss: -0.09877026933762763
          total_loss: -0.10183658541904556
          vf_explained_var: 0.5471389889717102
          vf_loss: 0.008791923819161537
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,390,10768,390000,-3.9342,-2.32,-5.36,393.42


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-21_22-07-02
  done: false
  episode_len_mean: 395.47
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.9546999999999604
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1073
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.4270703064070807
          entropy_coeff: 0.009999999999999998
          kl: 0.014043321441481875
          policy_loss: -0.07405723796950446
          total_loss: -0.07356790279348692
          vf_explained_var: 0.3984074890613556
          vf_loss: 0.01070642231627264
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,391,10794.4,391000,-3.9547,-2.32,-5.36,395.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-21_22-07-29
  done: false
  episode_len_mean: 392.47
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.92469999999996
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1076
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.7226728399594624
          entropy_coeff: 0.009999999999999998
          kl: 0.018748460727526232
          policy_loss: 0.009821708169248369
          total_loss: 0.0038175636695490945
          vf_explained_var: 0.6509382128715515
          vf_loss: 0.005810822158431013
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,392,10821.6,392000,-3.9247,-2.32,-5.36,392.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-21_22-07-54
  done: false
  episode_len_mean: 393.28
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.9327999999999603
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1078
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.3890980376137627
          entropy_coeff: 0.009999999999999998
          kl: 0.01774346840804605
          policy_loss: -0.08109326064586639
          total_loss: -0.08414348214864731
          vf_explained_var: 0.7805584669113159
          vf_loss: 0.0057190936019954584
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,393,10846.6,393000,-3.9328,-2.32,-5.36,393.28




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-21_22-08-40
  done: false
  episode_len_mean: 394.36
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.9435999999999605
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1081
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.416101172235277
          entropy_coeff: 0.009999999999999998
          kl: 0.012884707000500623
          policy_loss: -0.08852002148826917
          total_loss: -0.0919800022410022
          vf_explained_var: 0.7608373165130615
          vf_loss: 0.006981849931697879
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 3940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,394,10891.9,394000,-3.9436,-2.32,-5.36,394.36


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-21_22-09-06
  done: false
  episode_len_mean: 395.92
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.959199999999959
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1084
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.3496727877193027
          entropy_coeff: 0.009999999999999998
          kl: 0.011434135812122362
          policy_loss: 0.05441803832848867
          total_loss: 0.053306380907694496
          vf_explained_var: -0.03438631445169449
          vf_loss: 0.009084597944618307
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,395,10918.7,395000,-3.9592,-2.32,-5.36,395.92


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-21_22-09-36
  done: false
  episode_len_mean: 395.65
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.95649999999996
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1087
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.466530481974284
          entropy_coeff: 0.009999999999999998
          kl: 0.015238381199595372
          policy_loss: -0.06667860274513562
          total_loss: -0.07143800622887081
          vf_explained_var: 0.7940515279769897
          vf_loss: 0.005507331888657064
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 39600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,396,10948.1,396000,-3.9565,-2.32,-5.36,395.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-21_22-10-04
  done: false
  episode_len_mean: 398.4
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.98399999999996
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1090
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.6275327112939624
          entropy_coeff: 0.009999999999999998
          kl: 0.017214298784183294
          policy_loss: -0.04628779101702902
          total_loss: -0.05286631625559595
          vf_explained_var: 0.8291057348251343
          vf_loss: 0.004727884090971202
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 39700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,397,10976.1,397000,-3.984,-2.52,-5.36,398.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-21_22-10-29
  done: false
  episode_len_mean: 401.09
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -4.010899999999959
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 3
  episodes_total: 1093
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 1.6719603604740567
          entropy_coeff: 0.009999999999999998
          kl: 0.020698033137818723
          policy_loss: -0.023839137620396085
          total_loss: -0.026444232794973585
          vf_explained_var: 0.5039684772491455
          vf_loss: 0.00814001028193161
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,398,11001.1,398000,-4.0109,-2.62,-5.36,401.09


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-21_22-10-55
  done: false
  episode_len_mean: 399.37
  episode_media: {}
  episode_reward_max: -2.619999999999988
  episode_reward_mean: -3.993699999999959
  episode_reward_min: -5.35999999999993
  episodes_this_iter: 2
  episodes_total: 1095
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4329761367291214
          cur_lr: 5.000000000000001e-05
          entropy: 1.6331710232628716
          entropy_coeff: 0.009999999999999998
          kl: 0.013505706245603101
          policy_loss: -0.10825737234618929
          total_loss: -0.11060592813624276
          vf_explained_var: 0.5191429853439331
          vf_loss: 0.008135502945838704
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 3990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,399,11026.9,399000,-3.9937,-2.62,-5.36,399.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-21_22-11-22
  done: false
  episode_len_mean: 399.03
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.99029999999996
  episode_reward_min: -5.279999999999932
  episodes_this_iter: 3
  episodes_total: 1098
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4329761367291214
          cur_lr: 5.000000000000001e-05
          entropy: 1.3312071561813354
          entropy_coeff: 0.009999999999999998
          kl: 0.009756885246773175
          policy_loss: -0.11729112010863092
          total_loss: -0.11893166510595216
          vf_explained_var: 0.7531275153160095
          vf_loss: 0.007447028621875991
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,400,11054.2,400000,-3.9903,-2.79,-5.28,399.03


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-21_22-11-50
  done: false
  episode_len_mean: 396.34
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.963399999999959
  episode_reward_min: -5.279999999999932
  episodes_this_iter: 3
  episodes_total: 1101
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4329761367291214
          cur_lr: 5.000000000000001e-05
          entropy: 1.5281688226593866
          entropy_coeff: 0.009999999999999998
          kl: 0.011814995008175956
          policy_loss: -0.12278973346369135
          total_loss: -0.12580101461046272
          vf_explained_var: 0.7717520594596863
          vf_loss: 0.007154795889639192
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,401,11082,401000,-3.9634,-2.79,-5.28,396.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-21_22-12-13
  done: false
  episode_len_mean: 395.18
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.9517999999999605
  episode_reward_min: -5.279999999999932
  episodes_this_iter: 3
  episodes_total: 1104
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4329761367291214
          cur_lr: 5.000000000000001e-05
          entropy: 1.660051989555359
          entropy_coeff: 0.009999999999999998
          kl: 0.02009639559066114
          policy_loss: -0.03610424689120716
          total_loss: -0.03734600692987442
          vf_explained_var: 0.7176819443702698
          vf_loss: 0.0066575024276971815
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,402,11104.8,402000,-3.9518,-2.79,-5.28,395.18


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-21_22-12-36
  done: false
  episode_len_mean: 395.82
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.95819999999996
  episode_reward_min: -5.279999999999932
  episodes_this_iter: 2
  episodes_total: 1106
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.6152593149079217
          entropy_coeff: 0.009999999999999998
          kl: 0.01154371346467021
          policy_loss: -0.02511364064282841
          total_loss: -0.026310678157541487
          vf_explained_var: 0.651487410068512
          vf_loss: 0.007458328861846692
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 4030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,403,11128.1,403000,-3.9582,-2.79,-5.28,395.82


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-21_22-13-06
  done: false
  episode_len_mean: 392.28
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.9227999999999605
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1109
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5279923571480645
          entropy_coeff: 0.009999999999999998
          kl: 0.012666191587752944
          policy_loss: -0.008661607570118374
          total_loss: -0.009957802461253272
          vf_explained_var: 0.7605920433998108
          vf_loss: 0.005757489288872522
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,404,11158.1,404000,-3.9228,-2.79,-5.26,392.28




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-21_22-13-49
  done: false
  episode_len_mean: 389.75
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.8974999999999613
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1112
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.793137182129754
          entropy_coeff: 0.009999999999999998
          kl: 0.016598476173539982
          policy_loss: 0.006780349918537669
          total_loss: 0.006793334417872958
          vf_explained_var: 0.5144118666648865
          vf_loss: 0.00716424110190322
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,405,11201.3,405000,-3.8975,-2.79,-5.26,389.75


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-21_22-14-15
  done: false
  episode_len_mean: 389.19
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.8918999999999615
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 2
  episodes_total: 1114
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6494642050936816
          cur_lr: 5.000000000000001e-05
          entropy: 1.5696984105639988
          entropy_coeff: 0.009999999999999998
          kl: 0.02107269998600166
          policy_loss: -0.17205120258861117
          total_loss: -0.16724808282322354
          vf_explained_var: 0.15819677710533142
          vf_loss: 0.006814139190181676
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,406,11227.2,406000,-3.8919,-2.79,-5.26,389.19


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-21_22-14-38
  done: false
  episode_len_mean: 391.97
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.919699999999961
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1117
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.8103092537985908
          entropy_coeff: 0.009999999999999998
          kl: 0.009283671862460154
          policy_loss: 0.010093056451943186
          total_loss: 0.009929919325643115
          vf_explained_var: 0.47491776943206787
          vf_loss: 0.008895840858652566
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,407,11250,407000,-3.9197,-2.79,-5.26,391.97


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-21_22-15-04
  done: false
  episode_len_mean: 390.76
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.9075999999999613
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1120
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.6019232233365377
          entropy_coeff: 0.009999999999999998
          kl: 0.007990530397572628
          policy_loss: 0.05771012554566066
          total_loss: 0.0575630519953039
          vf_explained_var: 0.4138670265674591
          vf_loss: 0.008087815290006498
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 4080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,408,11276.4,408000,-3.9076,-2.79,-5.26,390.76


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-21_22-15-32
  done: false
  episode_len_mean: 387.88
  episode_media: {}
  episode_reward_max: -2.7899999999999845
  episode_reward_mean: -3.878799999999961
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1123
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.520823389954037
          entropy_coeff: 0.009999999999999998
          kl: 0.006645298153758805
          policy_loss: 0.09272933320866691
          total_loss: 0.08682916946709156
          vf_explained_var: 0.8092954754829407
          vf_loss: 0.0028342414970716667
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 4090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,409,11304.2,409000,-3.8788,-2.79,-5.26,387.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-21_22-16-00
  done: false
  episode_len_mean: 387.65
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.876499999999962
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1126
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7364676356315614
          entropy_coeff: 0.009999999999999998
          kl: 0.010694609342301181
          policy_loss: -0.02069987195233504
          total_loss: -0.02385178026225832
          vf_explained_var: 0.3945409059524536
          vf_loss: 0.0037941213942960733
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,410,11331.9,410000,-3.8765,-2.96,-5.26,387.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-21_22-16-25
  done: false
  episode_len_mean: 388.01
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.880099999999962
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 2
  episodes_total: 1128
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5157760447925992
          entropy_coeff: 0.009999999999999998
          kl: 0.010690689024477134
          policy_loss: -0.10504874322149489
          total_loss: -0.10532485362556246
          vf_explained_var: 0.7032318115234375
          vf_loss: 0.0044668230900747905
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,411,11357.2,411000,-3.8801,-2.96,-5.26,388.01


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-21_22-16-52
  done: false
  episode_len_mean: 388.33
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.8832999999999624
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1131
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.497170122464498
          entropy_coeff: 0.009999999999999998
          kl: 0.008980361919019783
          policy_loss: 0.0005157628407080968
          total_loss: -0.0005216562085681492
          vf_explained_var: 0.8352591395378113
          vf_loss: 0.005185643190311061
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,412,11384.4,412000,-3.8833,-2.96,-5.26,388.33


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-21_22-17-18
  done: false
  episode_len_mean: 387.12
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.871199999999962
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1134
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5711576342582703
          entropy_coeff: 0.009999999999999998
          kl: 0.00609474953299297
          policy_loss: 0.10412561487820414
          total_loss: 0.09960334954990281
          vf_explained_var: 0.6852079629898071
          vf_loss: 0.005251826168710573
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,413,11409.8,413000,-3.8712,-2.96,-5.26,387.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-21_22-17-44
  done: false
  episode_len_mean: 386.64
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.866399999999962
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1137
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5219557841618856
          entropy_coeff: 0.009999999999999998
          kl: 0.009017301836953944
          policy_loss: -0.04734666595856349
          total_loss: -0.04308869805600908
          vf_explained_var: 0.2341344654560089
          vf_loss: 0.010692903875476785
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,414,11436,414000,-3.8664,-2.96,-5.26,386.64


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-21_22-18-09
  done: false
  episode_len_mean: 386.04
  episode_media: {}
  episode_reward_max: -2.959999999999981
  episode_reward_mean: -3.8603999999999616
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 2
  episodes_total: 1139
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5088947839207119
          entropy_coeff: 0.009999999999999998
          kl: 0.009015391587318868
          policy_loss: -0.08357774416605632
          total_loss: -0.08432927661471896
          vf_explained_var: 0.7150930762290955
          vf_loss: 0.005554652360216197
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,415,11461.2,415000,-3.8604,-2.96,-5.26,386.04




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-21_22-18-52
  done: false
  episode_len_mean: 384.74
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.8473999999999613
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1142
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7393254823154873
          entropy_coeff: 0.009999999999999998
          kl: 0.008995811958347592
          policy_loss: 0.05460517820384767
          total_loss: 0.04870737658606635
          vf_explained_var: 0.8530375361442566
          vf_loss: 0.0027317655615560297
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,416,11504.2,416000,-3.8474,-2.83,-5.26,384.74


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-21_22-19-16
  done: false
  episode_len_mean: 384.79
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.847899999999963
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 2
  episodes_total: 1144
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.7232695089446173
          entropy_coeff: 0.009999999999999998
          kl: 0.014493394196347669
          policy_loss: -0.13865379575226042
          total_loss: -0.13539068198038473
          vf_explained_var: 0.7005143165588379
          vf_loss: 0.006376397629113247
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,417,11527.8,417000,-3.8479,-2.83,-5.26,384.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-21_22-19-42
  done: false
  episode_len_mean: 383.83
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.8382999999999616
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 1147
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.5852522492408752
          entropy_coeff: 0.009999999999999998
          kl: 0.009328727105116405
          policy_loss: -0.1440320298075676
          total_loss: -0.14371977587127024
          vf_explained_var: 0.7044475674629211
          vf_loss: 0.00707676112651825
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,418,11553.5,418000,-3.8383,-2.83,-5.26,383.83


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-21_22-20-10
  done: false
  episode_len_mean: 380.33
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.803299999999963
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 1150
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9741963076405227
          cur_lr: 5.000000000000001e-05
          entropy: 1.1824290302064684
          entropy_coeff: 0.009999999999999998
          kl: 0.0039791678305682076
          policy_loss: -0.07847999160488446
          total_loss: -0.07590942540102535
          vf_explained_var: 0.5657562613487244
          vf_loss: 0.010518365958705544
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,419,11581.4,419000,-3.8033,-2.83,-5.14,380.33


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-21_22-20-38
  done: false
  episode_len_mean: 377.07
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.770699999999963
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 1153
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.4196227219369677
          entropy_coeff: 0.009999999999999998
          kl: 0.009463759137233692
          policy_loss: -0.002592930446068446
          total_loss: -0.002605556903613938
          vf_explained_var: 0.42003992199897766
          vf_loss: 0.009573818911384377
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,420,11609.4,420000,-3.7707,-2.83,-5.14,377.07


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-21_22-21-09
  done: false
  episode_len_mean: 375.04
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.750399999999964
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 1156
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.1444092717435626
          entropy_coeff: 0.009999999999999998
          kl: 0.007804191222687212
          policy_loss: -0.10401055862506231
          total_loss: -0.09565842217869229
          vf_explained_var: 0.36695072054862976
          vf_loss: 0.015994823465330734
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,421,11640.7,421000,-3.7504,-2.83,-5.14,375.04


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-21_22-21-37
  done: false
  episode_len_mean: 368.4
  episode_media: {}
  episode_reward_max: -2.8299999999999836
  episode_reward_mean: -3.6839999999999655
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 4
  episodes_total: 1160
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.2989673150910273
          entropy_coeff: 0.009999999999999998
          kl: 0.008653277122870624
          policy_loss: -0.04061622661021021
          total_loss: -0.038931510514683194
          vf_explained_var: 0.47122621536254883
          vf_loss: 0.01045939154509041
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,422,11668.7,422000,-3.684,-2.83,-4.94,368.4


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-21_22-22-06
  done: false
  episode_len_mean: 364.76
  episode_media: {}
  episode_reward_max: -2.809999999999984
  episode_reward_mean: -3.647599999999967
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 1163
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.225237316555447
          entropy_coeff: 0.009999999999999998
          kl: 0.008702836826098409
          policy_loss: -0.021457589831617142
          total_loss: -0.02144487574696541
          vf_explained_var: 0.6262929439544678
          vf_loss: 0.008025949643666132
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,423,11697.5,423000,-3.6476,-2.81,-4.92,364.76


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-21_22-22-36
  done: false
  episode_len_mean: 360.71
  episode_media: {}
  episode_reward_max: -2.7399999999999856
  episode_reward_mean: -3.6070999999999676
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1166
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 0.9952199041843415
          entropy_coeff: 0.009999999999999998
          kl: 0.008630949778782416
          policy_loss: 0.03898694159256087
          total_loss: 0.0433150514960289
          vf_explained_var: 0.39541423320770264
          vf_loss: 0.01007618967236744
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,424,11728.1,424000,-3.6071,-2.74,-4.81,360.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-21_22-23-07
  done: false
  episode_len_mean: 356.23
  episode_media: {}
  episode_reward_max: -2.7399999999999856
  episode_reward_mean: -3.562299999999967
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1169
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 1.156344375345442
          entropy_coeff: 0.009999999999999998
          kl: 0.00807222981902274
          policy_loss: 0.0006366989678806728
          total_loss: 0.0002690894736184014
          vf_explained_var: 0.6509788036346436
          vf_loss: 0.007263866849502342
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,425,11758.9,425000,-3.5623,-2.74,-4.81,356.23




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-21_22-23-56
  done: false
  episode_len_mean: 351.28
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.512799999999969
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 4
  episodes_total: 1173
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48709815382026134
          cur_lr: 5.000000000000001e-05
          entropy: 0.8263409621185727
          entropy_coeff: 0.009999999999999998
          kl: 0.004287530380674396
          policy_loss: 0.007660446771317058
          total_loss: 0.015215627724925677
          vf_explained_var: 0.3905709683895111
          vf_loss: 0.013730139595766862
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,426,11807.7,426000,-3.5128,-2.59,-4.81,351.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-21_22-24-27
  done: false
  episode_len_mean: 350.31
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.5030999999999692
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1176
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 1.1870447450213961
          entropy_coeff: 0.009999999999999998
          kl: 0.011607034473406625
          policy_loss: 0.007334587474664052
          total_loss: 0.00774523549609714
          vf_explained_var: 0.5539177060127258
          vf_loss: 0.009454213656459211
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,427,11838.3,427000,-3.5031,-2.59,-4.81,350.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-21_22-25-01
  done: false
  episode_len_mean: 345.27
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.45269999999997
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 4
  episodes_total: 1180
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 0.7614274144172668
          entropy_coeff: 0.009999999999999998
          kl: 0.004922427422919718
          policy_loss: 0.0005965046584606171
          total_loss: 0.007272072633107503
          vf_explained_var: 0.3896647095680237
          vf_loss: 0.013090987306916052
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,428,11872.2,428000,-3.4527,-2.52,-4.81,345.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-21_22-25-31
  done: false
  episode_len_mean: 344.71
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.4470999999999696
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1183
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.5523459759023455
          entropy_coeff: 0.009999999999999998
          kl: 0.02316316518752678
          policy_loss: -0.023178248604138692
          total_loss: -0.02835364821884367
          vf_explained_var: 0.15645886957645416
          vf_loss: 0.007527375733479858
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,429,11902.2,429000,-3.4471,-2.52,-4.81,344.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-21_22-26-02
  done: false
  episode_len_mean: 343.76
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.4375999999999705
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1186
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1826618076825981
          cur_lr: 5.000000000000001e-05
          entropy: 1.2187496953540378
          entropy_coeff: 0.009999999999999998
          kl: 0.015281210031935277
          policy_loss: 0.016789075318309997
          total_loss: 0.015294231474399567
          vf_explained_var: 0.6016200184822083
          vf_loss: 0.007901361196612318
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,430,11933.1,430000,-3.4376,-2.52,-4.81,343.76


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-21_22-26-35
  done: false
  episode_len_mean: 341.3
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.412999999999971
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 4
  episodes_total: 1190
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1826618076825981
          cur_lr: 5.000000000000001e-05
          entropy: 0.9876407338513269
          entropy_coeff: 0.009999999999999998
          kl: 0.04300268733537678
          policy_loss: 0.021580598917272355
          total_loss: 0.02752041783597734
          vf_explained_var: 0.8317999243736267
          vf_loss: 0.007961281174276438
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 43100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,431,11966.9,431000,-3.413,-2.52,-4.81,341.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-21_22-27-05
  done: false
  episode_len_mean: 339.88
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.3987999999999716
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1193
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2739927115238969
          cur_lr: 5.000000000000001e-05
          entropy: 1.0331809090243445
          entropy_coeff: 0.009999999999999998
          kl: 0.011340450838933218
          policy_loss: 0.03472166731953621
          total_loss: 0.03970944508910179
          vf_explained_var: 0.31229156255722046
          vf_loss: 0.012212382479467326
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,432,11996.9,432000,-3.3988,-2.52,-4.81,339.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-21_22-27-37
  done: false
  episode_len_mean: 337.12
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.371199999999972
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1196
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2739927115238969
          cur_lr: 5.000000000000001e-05
          entropy: 0.8376344548331367
          entropy_coeff: 0.009999999999999998
          kl: 0.0049426997258580525
          policy_loss: -0.10571380588743422
          total_loss: -0.09941818027032746
          vf_explained_var: 0.41927745938301086
          vf_loss: 0.013317705846081178
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,433,12028.3,433000,-3.3712,-2.52,-4.81,337.12




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-21_22-28-24
  done: false
  episode_len_mean: 334.1
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.340999999999972
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 4
  episodes_total: 1200
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13699635576194846
          cur_lr: 5.000000000000001e-05
          entropy: 0.813706229130427
          entropy_coeff: 0.009999999999999998
          kl: 0.007789994898910279
          policy_loss: -0.007865459637509451
          total_loss: -0.0023662043942345515
          vf_explained_var: 0.4818163216114044
          vf_loss: 0.012569116935547854
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,434,12075.6,434000,-3.341,-2.45,-4.81,334.1


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-21_22-28-55
  done: false
  episode_len_mean: 333.27
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.3326999999999725
  episode_reward_min: -4.809999999999942
  episodes_this_iter: 3
  episodes_total: 1203
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13699635576194846
          cur_lr: 5.000000000000001e-05
          entropy: 1.4457876755131616
          entropy_coeff: 0.009999999999999998
          kl: 0.012740687669897601
          policy_loss: 0.03116162030233277
          total_loss: 0.02484204351074166
          vf_explained_var: 0.6075571775436401
          vf_loss: 0.006392868623758356
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,435,12105.9,435000,-3.3327,-2.45,-4.81,333.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-21_22-29-27
  done: false
  episode_len_mean: 329.28
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.2927999999999735
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1206
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13699635576194846
          cur_lr: 5.000000000000001e-05
          entropy: 0.9561462978521983
          entropy_coeff: 0.009999999999999998
          kl: 0.02084809448773248
          policy_loss: -0.12689340429173576
          total_loss: -0.12236380709542169
          vf_explained_var: 0.5563479661941528
          vf_loss: 0.011234945576224063
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,436,12138.8,436000,-3.2928,-2.45,-4.33,329.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-21_22-29-59
  done: false
  episode_len_mean: 327.45
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.2744999999999744
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 4
  episodes_total: 1210
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 1.1651942100789812
          entropy_coeff: 0.009999999999999998
          kl: 0.011809656292446943
          policy_loss: 0.01266386496524016
          total_loss: 0.011169285823901495
          vf_explained_var: 0.5958056449890137
          vf_loss: 0.007730548091543218
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,437,12170.8,437000,-3.2745,-2.45,-4.33,327.45


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-21_22-30-33
  done: false
  episode_len_mean: 324.25
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.242499999999975
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1213
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 1.15067683590783
          entropy_coeff: 0.009999999999999998
          kl: 0.019228333749869965
          policy_loss: -0.1234299456079801
          total_loss: -0.11593474853369924
          vf_explained_var: 0.4088244140148163
          vf_loss: 0.015050650263826052
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 4380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,438,12203.9,438000,-3.2425,-2.45,-4.33,324.25


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-21_22-31-04
  done: false
  episode_len_mean: 320.5
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.204999999999975
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 4
  episodes_total: 1217
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20549453364292275
          cur_lr: 5.000000000000001e-05
          entropy: 1.2980803668498992
          entropy_coeff: 0.009999999999999998
          kl: 0.05308394840052026
          policy_loss: -0.023279474965400167
          total_loss: -0.012553546701868375
          vf_explained_var: 0.4393017292022705
          vf_loss: 0.01279827132821083
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,439,12235,439000,-3.205,-2.45,-4.33,320.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-21_22-31-35
  done: false
  episode_len_mean: 319.94
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.1993999999999754
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1220
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3082418004643842
          cur_lr: 5.000000000000001e-05
          entropy: 1.6722670396169026
          entropy_coeff: 0.009999999999999998
          kl: 0.024902167510962296
          policy_loss: -0.0109074166458514
          total_loss: -0.014194357146819433
          vf_explained_var: 0.36409637331962585
          vf_loss: 0.005759838343753169
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,440,12266,440000,-3.1994,-2.45,-4.33,319.94


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-21_22-32-05
  done: false
  episode_len_mean: 317.97
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.179699999999976
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1223
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.0016867644257015
          entropy_coeff: 0.009999999999999998
          kl: 0.005376563818115758
          policy_loss: -0.10338095103700956
          total_loss: -0.09521839179926449
          vf_explained_var: 0.28843963146209717
          vf_loss: 0.01569350299735864
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,441,12296.3,441000,-3.1797,-2.45,-4.33,317.97


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-21_22-32-33
  done: false
  episode_len_mean: 318.05
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.1804999999999755
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1226
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.7208070000012716
          entropy_coeff: 0.009999999999999998
          kl: 0.012531063640388614
          policy_loss: -0.01702045632733239
          total_loss: -0.01754109346204334
          vf_explained_var: 0.2029591202735901
          vf_loss: 0.010893535466554265
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,442,12324,442000,-3.1805,-2.45,-4.33,318.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-21_22-33-04
  done: false
  episode_len_mean: 316.38
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.1637999999999766
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1229
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.2772181835439471
          entropy_coeff: 0.009999999999999998
          kl: 0.009744214735453562
          policy_loss: -0.11493428266710705
          total_loss: -0.11391929313540458
          vf_explained_var: 0.632897675037384
          vf_loss: 0.00928180881568955
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,443,12355.1,443000,-3.1638,-2.45,-4.33,316.38




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-21_22-33-52
  done: false
  episode_len_mean: 316.23
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.162299999999977
  episode_reward_min: -4.329999999999952
  episodes_this_iter: 3
  episodes_total: 1232
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.7661660101678636
          entropy_coeff: 0.009999999999999998
          kl: 0.016520461960052933
          policy_loss: -0.10122596240705914
          total_loss: -0.10405580202738444
          vf_explained_var: 0.8095064759254456
          vf_loss: 0.007193375914357603
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,444,12403.2,444000,-3.1623,-2.45,-4.33,316.23


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-21_22-34-24
  done: false
  episode_len_mean: 313.16
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.131599999999977
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 1236
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.2056650982962713
          entropy_coeff: 0.009999999999999998
          kl: 0.010494582102834235
          policy_loss: -0.0650447525911861
          total_loss: -0.059938069101836945
          vf_explained_var: 0.6174274682998657
          vf_loss: 0.01231103129684925
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,445,12435.1,445000,-3.1316,-2.45,-4.24,313.16


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-21_22-34-53
  done: false
  episode_len_mean: 312.22
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.1221999999999768
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 1239
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46236270069657603
          cur_lr: 5.000000000000001e-05
          entropy: 1.746115861998664
          entropy_coeff: 0.009999999999999998
          kl: 0.020483428074177665
          policy_loss: -0.04089505275090535
          total_loss: -0.040331804255644484
          vf_explained_var: 0.6039376854896545
          vf_loss: 0.008553629989425342
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,446,12464.4,446000,-3.1222,-2.45,-4.24,312.22


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-21_22-35-22
  done: false
  episode_len_mean: 311.78
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.117799999999978
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 2
  episodes_total: 1241
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6935440510448645
          cur_lr: 5.000000000000001e-05
          entropy: 1.6600804381900363
          entropy_coeff: 0.009999999999999998
          kl: 0.022143036372251897
          policy_loss: -0.13830969168080223
          total_loss: -0.13460495852761797
          vf_explained_var: 0.8990553021430969
          vf_loss: 0.004948367875638521
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,447,12492.9,447000,-3.1178,-2.45,-4.24,311.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-21_22-35-53
  done: false
  episode_len_mean: 309.37
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0936999999999784
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 4
  episodes_total: 1245
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.5153109749158225
          entropy_coeff: 0.009999999999999998
          kl: 0.008331229300657918
          policy_loss: -0.03489912967714998
          total_loss: -0.03475940856668684
          vf_explained_var: 0.837005078792572
          vf_loss: 0.006625713224315809
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,448,12524.2,448000,-3.0937,-2.45,-4.18,309.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-21_22-36-20
  done: false
  episode_len_mean: 308.69
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.086899999999978
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1248
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.608872860007816
          entropy_coeff: 0.009999999999999998
          kl: 0.007461438892175288
          policy_loss: -0.09532445983754265
          total_loss: -0.09834218546748161
          vf_explained_var: 0.8581292033195496
          vf_loss: 0.005308753360683719
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,449,12550.7,449000,-3.0869,-2.45,-4.18,308.69


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-21_22-36-51
  done: false
  episode_len_mean: 308.1
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.080999999999978
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1251
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.6004736688401964
          entropy_coeff: 0.009999999999999998
          kl: 0.010814895007101817
          policy_loss: 0.07196189736326536
          total_loss: 0.07072077501151297
          vf_explained_var: 0.8271591067314148
          vf_loss: 0.003512704903389224
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 45000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,450,12581.5,450000,-3.081,-2.45,-4.18,308.1


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-21_22-37-22
  done: false
  episode_len_mean: 307.35
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.073499999999979
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1254
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.4980518672201368
          entropy_coeff: 0.009999999999999998
          kl: 0.009928748602917972
          policy_loss: 0.09825575066109499
          total_loss: 0.09679535436961385
          vf_explained_var: 0.9297043085098267
          vf_loss: 0.003191084582229248
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 4510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,451,12613,451000,-3.0735,-2.45,-4.18,307.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-21_22-37-50
  done: false
  episode_len_mean: 308.58
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.085799999999978
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1257
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.792238676548004
          entropy_coeff: 0.009999999999999998
          kl: 0.009981452925648348
          policy_loss: 0.022142603165573545
          total_loss: 0.022296892437669965
          vf_explained_var: 0.7265974283218384
          vf_loss: 0.00769281233014125
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 4520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,452,12640.8,452000,-3.0858,-2.45,-4.18,308.58




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-21_22-38-39
  done: false
  episode_len_mean: 307.26
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0725999999999782
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1260
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.0173111617565156
          entropy_coeff: 0.009999999999999998
          kl: 0.006033162018452703
          policy_loss: -0.0962620457013448
          total_loss: -0.09264104863007863
          vf_explained_var: 0.8282946348190308
          vf_loss: 0.007517710333276126
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 45

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,453,12690.2,453000,-3.0726,-2.45,-4.18,307.26


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-21_22-39-13
  done: false
  episode_len_mean: 306.5
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.064999999999978
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 4
  episodes_total: 1264
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.2559118098682827
          entropy_coeff: 0.009999999999999998
          kl: 0.007002751145118364
          policy_loss: -0.04059258119927512
          total_loss: -0.039012231884731186
          vf_explained_var: 0.7982091903686523
          vf_loss: 0.0068543953510622185
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,454,12724.3,454000,-3.065,-2.45,-4.18,306.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-21_22-39-44
  done: false
  episode_len_mean: 306.55
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.065499999999978
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1267
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.1336669637097252
          entropy_coeff: 0.009999999999999998
          kl: 0.00588335875159858
          policy_loss: 0.09427313949498865
          total_loss: 0.09325167689886359
          vf_explained_var: 0.809240996837616
          vf_loss: 0.00419465494212798
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,455,12754.7,455000,-3.0655,-2.45,-4.18,306.55


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-21_22-40-17
  done: false
  episode_len_mean: 306.58
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.065799999999979
  episode_reward_min: -4.179999999999955
  episodes_this_iter: 3
  episodes_total: 1270
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.055843115515179
          entropy_coeff: 0.009999999999999998
          kl: 0.0054899317385560115
          policy_loss: -0.1469858433637354
          total_loss: -0.14310964287983047
          vf_explained_var: 0.680291473865509
          vf_loss: 0.00872336721772121
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 45600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,456,12788.1,456000,-3.0658,-2.45,-4.18,306.58


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-21_22-40-44
  done: false
  episode_len_mean: 308.44
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.084399999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1273
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.6289387497637007
          entropy_coeff: 0.009999999999999998
          kl: 0.013979418236993071
          policy_loss: -0.1056510360704528
          total_loss: -0.09956281259655952
          vf_explained_var: 0.5968689322471619
          vf_loss: 0.007834596342096727
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 45

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,457,12814.4,457000,-3.0844,-2.45,-4.45,308.44


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-21_22-41-15
  done: false
  episode_len_mean: 308.02
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0801999999999783
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1277
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403160765672967
          cur_lr: 5.000000000000001e-05
          entropy: 1.1583998017840915
          entropy_coeff: 0.009999999999999998
          kl: 0.004835823292180165
          policy_loss: -0.005378632992506027
          total_loss: -0.0029100999236106873
          vf_explained_var: 0.6193836331367493
          vf_loss: 0.009021747458933128
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,458,12845.6,458000,-3.0802,-2.45,-4.45,308.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-21_22-41-46
  done: false
  episode_len_mean: 308.88
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.088799999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1280
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.2124769614802466
          entropy_coeff: 0.009999999999999998
          kl: 0.012880704732044279
          policy_loss: 0.026725071999761794
          total_loss: 0.027456378440062205
          vf_explained_var: 0.6564803123474121
          vf_loss: 0.006156075782039099
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,459,12877,459000,-3.0888,-2.45,-4.45,308.88


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-21_22-42-17
  done: false
  episode_len_mean: 308.57
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.085699999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1283
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.1622080915504032
          entropy_coeff: 0.009999999999999998
          kl: 0.00651159349274053
          policy_loss: -0.08490367945697573
          total_loss: -0.08286917573875852
          vf_explained_var: 0.5212703943252563
          vf_loss: 0.010269523484425412
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,460,12907.8,460000,-3.0857,-2.45,-4.45,308.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-21_22-42-50
  done: false
  episode_len_mean: 308.38
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.083799999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1287
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.313297290272183
          entropy_coeff: 0.009999999999999998
          kl: 0.008612915569449974
          policy_loss: -0.05453703800837199
          total_loss: -0.055685503780841826
          vf_explained_var: 0.7265231609344482
          vf_loss: 0.0075044304235941835
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,461,12940.7,461000,-3.0838,-2.45,-4.45,308.38




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-21_22-43-38
  done: false
  episode_len_mean: 308.43
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0842999999999785
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1290
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.1368724279933506
          entropy_coeff: 0.009999999999999998
          kl: 0.006232739143350546
          policy_loss: -0.004987177666690615
          total_loss: -0.007126401613156001
          vf_explained_var: 0.7509344220161438
          vf_loss: 0.00598749163457089
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,462,12988.7,462000,-3.0843,-2.45,-4.45,308.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-21_22-44-13
  done: false
  episode_len_mean: 308.35
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0834999999999777
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1294
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 1.0575324177742005
          entropy_coeff: 0.009999999999999998
          kl: 0.005623848279761192
          policy_loss: -0.05300512421462271
          total_loss: -0.05208182980616887
          vf_explained_var: 0.7046127915382385
          vf_loss: 0.008573328170718418
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,463,13024,463000,-3.0835,-2.45,-4.45,308.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-21_22-44-45
  done: false
  episode_len_mean: 307.72
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0771999999999777
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1297
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 0.7865579624970754
          entropy_coeff: 0.009999999999999998
          kl: 0.007205369935808805
          policy_loss: -0.03596275274952253
          total_loss: -0.033643842488527295
          vf_explained_var: 0.7701460123062134
          vf_loss: 0.006436556770414528
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,464,13055.5,464000,-3.0772,-2.45,-4.45,307.72


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-21_22-45-18
  done: false
  episode_len_mean: 307.57
  episode_media: {}
  episode_reward_max: -2.52999999999999
  episode_reward_mean: -3.075699999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1301
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5201580382836484
          cur_lr: 5.000000000000001e-05
          entropy: 0.8639602886305915
          entropy_coeff: 0.009999999999999998
          kl: 0.0035652856918110686
          policy_loss: -0.0380814082092709
          total_loss: -0.035967153931657475
          vf_explained_var: 0.6721564531326294
          vf_loss: 0.00889934596295158
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,465,13089,465000,-3.0757,-2.53,-4.45,307.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-21_22-45-52
  done: false
  episode_len_mean: 306.65
  episode_media: {}
  episode_reward_max: -2.52999999999999
  episode_reward_mean: -3.0664999999999787
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1304
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2600790191418242
          cur_lr: 5.000000000000001e-05
          entropy: 0.732845601770613
          entropy_coeff: 0.009999999999999998
          kl: 0.0047103773127485085
          policy_loss: 0.041827336533202066
          total_loss: 0.042367727309465406
          vf_explained_var: 0.6507368087768555
          vf_loss: 0.006643776688724757
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,466,13122.2,466000,-3.0665,-2.53,-4.45,306.65


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-21_22-46-26
  done: false
  episode_len_mean: 305.66
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0565999999999787
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1308
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1300395095709121
          cur_lr: 5.000000000000001e-05
          entropy: 0.7269428829352061
          entropy_coeff: 0.009999999999999998
          kl: 0.0046046379697041095
          policy_loss: 0.009234965137309498
          total_loss: 0.012644903692934249
          vf_explained_var: 0.5642585754394531
          vf_loss: 0.010080581510232554
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,467,13156.2,467000,-3.0566,-2.44,-4.45,305.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-21_22-46-59
  done: false
  episode_len_mean: 306.18
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.061799999999978
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1311
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 1.1974327087402343
          entropy_coeff: 0.009999999999999998
          kl: 0.017803939710285217
          policy_loss: -0.08530656728479598
          total_loss: -0.08592209741473197
          vf_explained_var: 0.5506964325904846
          vf_loss: 0.01020118647461964
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,468,13190,468000,-3.0618,-2.44,-4.45,306.18


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-21_22-47-32
  done: false
  episode_len_mean: 305.83
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0582999999999787
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1315
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 0.7020716276433733
          entropy_coeff: 0.009999999999999998
          kl: 0.00845798086247426
          policy_loss: 0.034832861440049274
          total_loss: 0.03753267568018701
          vf_explained_var: 0.6805333495140076
          vf_loss: 0.009170594945963886
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,469,13222.2,469000,-3.0583,-2.44,-4.45,305.83


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-21_22-48-07
  done: false
  episode_len_mean: 303.79
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0378999999999787
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1319
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 0.6556137323379516
          entropy_coeff: 0.009999999999999998
          kl: 0.006430490990135398
          policy_loss: -0.006381729286577966
          total_loss: -0.0021518821517626445
          vf_explained_var: 0.6023604869842529
          vf_loss: 0.010367876963896884
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,470,13257,470000,-3.0379,-2.44,-4.45,303.79




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-21_22-48-56
  done: false
  episode_len_mean: 304.11
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0410999999999797
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1322
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06501975478545605
          cur_lr: 5.000000000000001e-05
          entropy: 1.039090249935786
          entropy_coeff: 0.009999999999999998
          kl: 0.043384856702173895
          policy_loss: 0.036822271015908986
          total_loss: 0.03643569598595301
          vf_explained_var: 0.7075374126434326
          vf_loss: 0.007183455666139101
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 47

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,471,13306,471000,-3.0411,-2.44,-4.45,304.11


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-21_22-49-29
  done: false
  episode_len_mean: 301.86
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.0185999999999797
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1326
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.841186934709549
          entropy_coeff: 0.009999999999999998
          kl: 0.0075147580930414095
          policy_loss: -0.018629476345247694
          total_loss: -0.018223907550175986
          vf_explained_var: 0.6575758457183838
          vf_loss: 0.008084527801515327
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,472,13339,472000,-3.0186,-2.44,-4.45,301.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-21_22-50-02
  done: false
  episode_len_mean: 300.99
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.009899999999979
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1329
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.9423380335172017
          entropy_coeff: 0.009999999999999998
          kl: 0.011421880960405102
          policy_loss: 0.034606580519013934
          total_loss: 0.033546061482694414
          vf_explained_var: 0.6750606298446655
          vf_loss: 0.007248884671005524
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,473,13372.6,473000,-3.0099,-2.44,-4.45,300.99


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-21_22-50-37
  done: false
  episode_len_mean: 298.89
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.98889999999998
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1333
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.7938600076569451
          entropy_coeff: 0.009999999999999998
          kl: 0.010791373588624253
          policy_loss: -0.021786310896277427
          total_loss: -0.017982562548584408
          vf_explained_var: 0.5424261093139648
          vf_loss: 0.010689872037619352
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,474,13407.6,474000,-2.9889,-2.44,-4.45,298.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-21_22-51-11
  done: false
  episode_len_mean: 298.32
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.98319999999998
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1336
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.5672854161924786
          entropy_coeff: 0.009999999999999998
          kl: 0.008725321354629835
          policy_loss: -0.005631541005439228
          total_loss: -0.0024880739135874643
          vf_explained_var: 0.5254338979721069
          vf_loss: 0.007965344651084807
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,475,13441.6,475000,-2.9832,-2.44,-4.45,298.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-21_22-51-44
  done: false
  episode_len_mean: 297.34
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.97339999999998
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1339
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.7225643191072676
          entropy_coeff: 0.009999999999999998
          kl: 0.014720081283933853
          policy_loss: -0.1487306756277879
          total_loss: -0.1423749460114373
          vf_explained_var: 0.39942067861557007
          vf_loss: 0.01214572699326608
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 4760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,476,13474,476000,-2.9734,-2.44,-4.45,297.34


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-21_22-52-19
  done: false
  episode_len_mean: 295.12
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9511999999999805
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1343
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.4541748176018397
          entropy_coeff: 0.009999999999999998
          kl: 0.005054678426167432
          policy_loss: -0.034677093972762425
          total_loss: -0.027394323299328487
          vf_explained_var: 0.3973146080970764
          vf_loss: 0.011331538949161769
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,477,13509.2,477000,-2.9512,-2.44,-4.45,295.12


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-21_22-52-54
  done: false
  episode_len_mean: 292.57
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.9256999999999813
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1347
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.49697572224669984
          entropy_coeff: 0.009999999999999998
          kl: 0.00836815733988598
          policy_loss: 0.012218754324648116
          total_loss: 0.019482154150803885
          vf_explained_var: 0.42362070083618164
          vf_loss: 0.011417011057751046
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,478,13543.9,478000,-2.9257,-2.44,-4.45,292.57




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-21_22-53-47
  done: false
  episode_len_mean: 289.74
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.8973999999999815
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1351
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.6459424952665965
          entropy_coeff: 0.009999999999999998
          kl: 0.008772049288100605
          policy_loss: 0.023172561327616373
          total_loss: 0.02844389950235685
          vf_explained_var: 0.44378793239593506
          vf_loss: 0.010875225734586517
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,479,13596.8,479000,-2.8974,-2.34,-4.45,289.74


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-21_22-54-22
  done: false
  episode_len_mean: 288.0
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.8799999999999826
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1355
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.6422377202245925
          entropy_coeff: 0.009999999999999998
          kl: 0.007847468416715249
          policy_loss: 0.005858300543493695
          total_loss: 0.012699977142943276
          vf_explained_var: 0.4086560904979706
          vf_loss: 0.012498693075031042
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,480,13632.7,480000,-2.88,-2.34,-4.45,288


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-21_22-54-59
  done: false
  episode_len_mean: 285.82
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.8581999999999823
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1358
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.6909684816996257
          entropy_coeff: 0.009999999999999998
          kl: 0.014355215859639496
          policy_loss: -0.10983141544792387
          total_loss: -0.10311220337947209
          vf_explained_var: 0.40127304196357727
          vf_loss: 0.012228838964882824
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,481,13669.4,481000,-2.8582,-2.34,-4.45,285.82


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-21_22-55-36
  done: false
  episode_len_mean: 284.71
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.847099999999983
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1362
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09752963217818403
          cur_lr: 5.000000000000001e-05
          entropy: 0.604940465092659
          entropy_coeff: 0.009999999999999998
          kl: 0.023152161060429825
          policy_loss: -0.017967298006018
          total_loss: -0.011019663761059444
          vf_explained_var: 0.426461398601532
          vf_loss: 0.010739016657074293
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 4820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,482,13706.5,482000,-2.8471,-2.34,-4.45,284.71


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-21_22-56-10
  done: false
  episode_len_mean: 283.35
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.833499999999984
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 1366
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1462944482672761
          cur_lr: 5.000000000000001e-05
          entropy: 0.5293343805604511
          entropy_coeff: 0.009999999999999998
          kl: 0.007653589512885987
          policy_loss: 0.021754684588975375
          total_loss: 0.029176701770888436
          vf_explained_var: 0.37863603234291077
          vf_loss: 0.011595683053342833
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,483,13739.8,483000,-2.8335,-2.34,-4.45,283.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-21_22-56-44
  done: false
  episode_len_mean: 282.61
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.826099999999983
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 3
  episodes_total: 1369
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1462944482672761
          cur_lr: 5.000000000000001e-05
          entropy: 0.5498123447100322
          entropy_coeff: 0.009999999999999998
          kl: 0.00572264485359503
          policy_loss: -0.10850717566079564
          total_loss: -0.10191965450843175
          vf_explained_var: 0.43948906660079956
          vf_loss: 0.011248453365017971
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,484,13774.6,484000,-2.8261,-2.34,-4.45,282.61


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-21_22-57-18
  done: false
  episode_len_mean: 280.59
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.8058999999999834
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 1373
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1462944482672761
          cur_lr: 5.000000000000001e-05
          entropy: 0.5856406122446061
          entropy_coeff: 0.009999999999999998
          kl: 0.012942933148203531
          policy_loss: 0.016022122320201664
          total_loss: 0.021258702418870398
          vf_explained_var: 0.4905703365802765
          vf_loss: 0.009199506002995702
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,485,13808.1,485000,-2.8059,-2.34,-3.74,280.59


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-21_22-57-42
  done: false
  episode_len_mean: 282.6
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.8259999999999836
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 1376
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1462944482672761
          cur_lr: 5.000000000000001e-05
          entropy: 1.1915424194600848
          entropy_coeff: 0.009999999999999998
          kl: 0.05031980375667224
          policy_loss: -0.001131073468261295
          total_loss: 0.0016084965732362534
          vf_explained_var: 0.18295754492282867
          vf_loss: 0.007293487232851071
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,486,13831.9,486000,-2.826,-2.34,-4.2,282.6




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-21_22-58-33
  done: false
  episode_len_mean: 281.5
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8149999999999835
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1380
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21944167240091414
          cur_lr: 5.000000000000001e-05
          entropy: 0.6324451363748974
          entropy_coeff: 0.009999999999999998
          kl: 0.007642684901623155
          policy_loss: 0.007905528901351823
          total_loss: 0.01227438992096318
          vf_explained_var: 0.6358770728111267
          vf_loss: 0.009016190386480756
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,487,13882.8,487000,-2.815,-2.29,-4.2,281.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-21_22-59-08
  done: false
  episode_len_mean: 280.6
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8059999999999845
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 1383
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21944167240091414
          cur_lr: 5.000000000000001e-05
          entropy: 0.6775808248254988
          entropy_coeff: 0.009999999999999998
          kl: 0.011300215271690128
          policy_loss: 0.011234187955657641
          total_loss: 0.014794287582238516
          vf_explained_var: 0.6108174920082092
          vf_loss: 0.007856171957812168
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,488,13918.1,488000,-2.806,-2.29,-4.2,280.6


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-21_22-59-45
  done: false
  episode_len_mean: 278.93
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.789299999999985
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1387
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21944167240091414
          cur_lr: 5.000000000000001e-05
          entropy: 0.4274478766653273
          entropy_coeff: 0.009999999999999998
          kl: 0.009155577224603192
          policy_loss: 0.046115663399298984
          total_loss: 0.05236618493994077
          vf_explained_var: 0.501542329788208
          vf_loss: 0.008515886641624901
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 4890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,489,13955.3,489000,-2.7893,-2.29,-4.2,278.93


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-21_23-00-22
  done: false
  episode_len_mean: 277.32
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.773199999999985
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1391
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21944167240091414
          cur_lr: 5.000000000000001e-05
          entropy: 0.6038019822703468
          entropy_coeff: 0.009999999999999998
          kl: 0.007701151812080238
          policy_loss: 0.021938034809297985
          total_loss: 0.027500049852662616
          vf_explained_var: 0.4876991808414459
          vf_loss: 0.009910079133179452
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,490,13991.6,490000,-2.7732,-2.29,-4.2,277.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-21_23-00-59
  done: false
  episode_len_mean: 276.3
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.762999999999985
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1395
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21944167240091414
          cur_lr: 5.000000000000001e-05
          entropy: 0.5586050907770793
          entropy_coeff: 0.009999999999999998
          kl: 0.004568163129861662
          policy_loss: 0.019791207172804408
          total_loss: 0.02702044054037995
          vf_explained_var: 0.3240925073623657
          vf_loss: 0.011812841654237774
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 4910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,491,14029.2,491000,-2.763,-2.29,-4.2,276.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-21_23-01-34
  done: false
  episode_len_mean: 275.72
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7571999999999846
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1399
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10972083620045707
          cur_lr: 5.000000000000001e-05
          entropy: 0.6071177353461583
          entropy_coeff: 0.009999999999999998
          kl: 0.00745189107630853
          policy_loss: -0.05298366240329212
          total_loss: -0.04839641793320577
          vf_explained_var: 0.4673498868942261
          vf_loss: 0.009840792671052947
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,492,14064.3,492000,-2.7572,-2.29,-4.2,275.72


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-21_23-02-10
  done: false
  episode_len_mean: 274.73
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.747299999999985
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1403
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10972083620045707
          cur_lr: 5.000000000000001e-05
          entropy: 0.49502877824836305
          entropy_coeff: 0.009999999999999998
          kl: 0.004630142446339312
          policy_loss: -0.02031034462981754
          total_loss: -0.013736575593551
          vf_explained_var: 0.31556302309036255
          vf_loss: 0.011016033227658935
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,493,14100.4,493000,-2.7473,-2.29,-4.2,274.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-21_23-02-48
  done: false
  episode_len_mean: 274.04
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.740399999999986
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 1406
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 0.4990555375814438
          entropy_coeff: 0.009999999999999998
          kl: 0.005005079129668862
          policy_loss: -0.07411663557092349
          total_loss: -0.0670279539293713
          vf_explained_var: 0.3047462999820709
          vf_loss: 0.011804652358922694
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,494,14137.8,494000,-2.7404,-2.29,-4.2,274.04




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-21_23-03-44
  done: false
  episode_len_mean: 272.13
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.721299999999985
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 5
  episodes_total: 1411
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 0.4465164926317003
          entropy_coeff: 0.009999999999999998
          kl: 0.004024310572562149
          policy_loss: -0.020306114148762492
          total_loss: -0.014120872774057918
          vf_explained_var: 0.5290691256523132
          vf_loss: 0.010429631525443659
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,495,14194.3,495000,-2.7213,-2.15,-4.2,272.13


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-21_23-04-20
  done: false
  episode_len_mean: 272.13
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7212999999999865
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 1414
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027430209050114267
          cur_lr: 5.000000000000001e-05
          entropy: 0.625793566637569
          entropy_coeff: 0.009999999999999998
          kl: 0.020191960832465602
          policy_loss: -0.06452685064739651
          total_loss: -0.06044889423582289
          vf_explained_var: -0.08266779780387878
          vf_loss: 0.009782023047510949
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,496,14229.6,496000,-2.7213,-2.15,-4.2,272.13


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-21_23-04-58
  done: false
  episode_len_mean: 270.78
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7077999999999856
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 1418
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04114531357517139
          cur_lr: 5.000000000000001e-05
          entropy: 0.5636664681964451
          entropy_coeff: 0.009999999999999998
          kl: 0.042328828384842684
          policy_loss: -0.017109900464614233
          total_loss: -0.010756391369634204
          vf_explained_var: 0.5207743048667908
          vf_loss: 0.01024853995639003
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,497,14267.9,497000,-2.7078,-2.15,-4.2,270.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-21_23-05-25
  done: false
  episode_len_mean: 271.69
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7168999999999857
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 1421
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0617179703627571
          cur_lr: 5.000000000000001e-05
          entropy: 1.240935620996687
          entropy_coeff: 0.009999999999999998
          kl: 0.04730445703171024
          policy_loss: -0.14671271153622203
          total_loss: -0.14454963621166017
          vf_explained_var: 0.3317853808403015
          vf_loss: 0.011652901722118258
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 4980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,498,14295,498000,-2.7169,-2.15,-4.2,271.69


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-21_23-05-54
  done: false
  episode_len_mean: 273.63
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7362999999999857
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1424
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09257695554413563
          cur_lr: 5.000000000000001e-05
          entropy: 0.5213571333222919
          entropy_coeff: 0.009999999999999998
          kl: 0.03166826464160583
          policy_loss: -0.09905071987046135
          total_loss: -0.07895028707053926
          vf_explained_var: -0.11926410347223282
          vf_loss: 0.022382253718872865
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,499,14324.1,499000,-2.7363,-2.15,-5.65,273.63


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-21_23-06-27
  done: false
  episode_len_mean: 273.41
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.734099999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1428
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.9618497312068939
          entropy_coeff: 0.009999999999999998
          kl: 0.027945956241017377
          policy_loss: 0.03749265070590708
          total_loss: 0.04063266314980057
          vf_explained_var: 0.22141647338867188
          vf_loss: 0.008877780777402223
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,500,14357.3,500000,-2.7341,-2.15,-5.65,273.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-21_23-07-00
  done: false
  episode_len_mean: 273.86
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7385999999999844
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1431
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2082981499743051
          cur_lr: 5.000000000000001e-05
          entropy: 0.987537956237793
          entropy_coeff: 0.009999999999999998
          kl: 0.013319992101869557
          policy_loss: -0.023529274927245245
          total_loss: -0.020871039728323618
          vf_explained_var: 0.0771874189376831
          vf_loss: 0.009759085669389201
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,501,14389.7,501000,-2.7386,-2.15,-5.65,273.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-21_23-07-33
  done: false
  episode_len_mean: 273.35
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7334999999999847
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1435
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2082981499743051
          cur_lr: 5.000000000000001e-05
          entropy: 0.6690354294247097
          entropy_coeff: 0.009999999999999998
          kl: 0.006421491285599343
          policy_loss: 0.044956319779157636
          total_loss: 0.04659163032968839
          vf_explained_var: 0.41713473200798035
          vf_loss: 0.006988078003956212
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,502,14422.5,502000,-2.7335,-2.15,-5.65,273.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-21_23-08-02
  done: false
  episode_len_mean: 273.83
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.738299999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1438
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2082981499743051
          cur_lr: 5.000000000000001e-05
          entropy: 1.2447051286697388
          entropy_coeff: 0.009999999999999998
          kl: 0.012896288125019436
          policy_loss: 0.005206187731689877
          total_loss: 0.0015212610363960267
          vf_explained_var: 0.46092361211776733
          vf_loss: 0.006075853265226922
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,503,14452.1,503000,-2.7383,-2.15,-5.65,273.83




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-21_23-08-53
  done: false
  episode_len_mean: 275.2
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.751999999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1441
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2082981499743051
          cur_lr: 5.000000000000001e-05
          entropy: 1.430640086862776
          entropy_coeff: 0.009999999999999998
          kl: 0.04802550650893635
          policy_loss: 0.0021254846619235147
          total_loss: 0.0020953266157044304
          vf_explained_var: 0.7587254643440247
          vf_loss: 0.004272616664982505
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 5040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,504,14502.9,504000,-2.752,-2.15,-5.65,275.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-21_23-09-27
  done: false
  episode_len_mean: 274.77
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.747699999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1445
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3124472249614577
          cur_lr: 5.000000000000001e-05
          entropy: 0.6618993765778012
          entropy_coeff: 0.009999999999999998
          kl: 0.006408153777785626
          policy_loss: 7.1696937084197995e-06
          total_loss: 0.009589574734369914
          vf_explained_var: 0.24694472551345825
          vf_loss: 0.014199186478637986
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,505,14537,505000,-2.7477,-2.15,-5.65,274.77


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-21_23-09-59
  done: false
  episode_len_mean: 276.69
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.766899999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1448
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3124472249614577
          cur_lr: 5.000000000000001e-05
          entropy: 1.172153123219808
          entropy_coeff: 0.009999999999999998
          kl: 0.014462620768565495
          policy_loss: -0.013992080589135487
          total_loss: -0.013010065588686202
          vf_explained_var: 0.4750766456127167
          vf_loss: 0.008184740309500033
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,506,14568.3,506000,-2.7669,-2.15,-5.65,276.69


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-21_23-10-30
  done: false
  episode_len_mean: 278.0
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7799999999999843
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1452
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3124472249614577
          cur_lr: 5.000000000000001e-05
          entropy: 1.1656923479504055
          entropy_coeff: 0.009999999999999998
          kl: 0.01010942759916694
          policy_loss: -0.00040735362304581535
          total_loss: 0.002511421259906557
          vf_explained_var: 0.4940005838871002
          vf_loss: 0.011417035479098558
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,507,14599.2,507000,-2.78,-2.15,-5.65,278


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-21_23-11-07
  done: false
  episode_len_mean: 278.0
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7799999999999843
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1456
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3124472249614577
          cur_lr: 5.000000000000001e-05
          entropy: 0.7912040233612061
          entropy_coeff: 0.009999999999999998
          kl: 0.0035290157091409924
          policy_loss: -0.009953533940845066
          total_loss: -0.006403201094104184
          vf_explained_var: 0.5167177319526672
          vf_loss: 0.010359741374850273
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,508,14636.3,508000,-2.78,-2.15,-5.65,278


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-21_23-11-43
  done: false
  episode_len_mean: 277.64
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7763999999999838
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1459
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15622361248072886
          cur_lr: 5.000000000000001e-05
          entropy: 0.6888232833809322
          entropy_coeff: 0.009999999999999998
          kl: 0.00846089830981341
          policy_loss: -0.060130093081129925
          total_loss: -0.0563814918200175
          vf_explained_var: 0.6219021081924438
          vf_loss: 0.009315040816242496
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,509,14672.2,509000,-2.7764,-2.15,-5.65,277.64


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-21_23-12-20
  done: false
  episode_len_mean: 277.63
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7762999999999844
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1463
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15622361248072886
          cur_lr: 5.000000000000001e-05
          entropy: 0.7364351507690218
          entropy_coeff: 0.009999999999999998
          kl: 0.007134562406944016
          policy_loss: 0.023061188931266467
          total_loss: 0.028032465361886555
          vf_explained_var: 0.5104951858520508
          vf_loss: 0.01122104318605529
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,510,14709.3,510000,-2.7763,-2.15,-5.65,277.63


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-21_23-12-55
  done: false
  episode_len_mean: 278.7
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7869999999999844
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1466
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15622361248072886
          cur_lr: 5.000000000000001e-05
          entropy: 1.0940993713008034
          entropy_coeff: 0.009999999999999998
          kl: 0.038057610246557454
          policy_loss: -0.11392093383603626
          total_loss: -0.10968768778774474
          vf_explained_var: 0.6635743379592896
          vf_loss: 0.00922874306432075
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,511,14744.4,511000,-2.787,-2.15,-5.65,278.7




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-21_23-13-50
  done: false
  episode_len_mean: 277.78
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.777799999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1470
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.6772908535268571
          entropy_coeff: 0.009999999999999998
          kl: 0.005658383271149554
          policy_loss: -0.03816055585112837
          total_loss: -0.03310243975785043
          vf_explained_var: 0.5584376454353333
          vf_loss: 0.010505062714219093
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,512,14799.3,512000,-2.7778,-2.15,-5.65,277.78


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-21_23-14-24
  done: false
  episode_len_mean: 277.79
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7778999999999843
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1474
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.8476710067854987
          entropy_coeff: 0.009999999999999998
          kl: 0.01003436193020022
          policy_loss: -0.005915932854016622
          total_loss: -0.001006193541818195
          vf_explained_var: 0.532079815864563
          vf_loss: 0.011035044491291047
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,513,14833,513000,-2.7779,-2.15,-5.65,277.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-21_23-14-59
  done: false
  episode_len_mean: 274.39
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.743899999999986
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1478
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.8734263936678569
          entropy_coeff: 0.009999999999999998
          kl: 0.006962780174244168
          policy_loss: -0.0012861428161462147
          total_loss: 0.001195356912083096
          vf_explained_var: 0.547210156917572
          vf_loss: 0.009584138850267562
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,514,14868.9,514000,-2.7439,-2.15,-5.65,274.39


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-21_23-15-37
  done: false
  episode_len_mean: 275.04
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7503999999999853
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1481
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.6946769376595815
          entropy_coeff: 0.009999999999999998
          kl: 0.004971676985621186
          policy_loss: -0.10925626763039165
          total_loss: -0.10320887996090783
          vf_explained_var: 0.456130713224411
          vf_loss: 0.01182911769590444
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,515,14906.5,515000,-2.7504,-2.15,-5.65,275.04


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-21_23-16-14
  done: false
  episode_len_mean: 274.49
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7448999999999852
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1485
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 0.6228111180994246
          entropy_coeff: 0.009999999999999998
          kl: 0.006109880913447914
          policy_loss: 0.002636559804280599
          total_loss: 0.008267021675904592
          vf_explained_var: 0.4115751385688782
          vf_loss: 0.011142695488201248
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,516,14943.6,516000,-2.7449,-2.15,-5.65,274.49


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-21_23-16-50
  done: false
  episode_len_mean: 274.8
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7479999999999847
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1489
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 0.6614859388934241
          entropy_coeff: 0.009999999999999998
          kl: 0.005369788246058777
          policy_loss: 0.02038173195388582
          total_loss: 0.026164884782499738
          vf_explained_var: 0.43124091625213623
          vf_loss: 0.011768845830940539
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,517,14979.8,517000,-2.748,-2.15,-5.65,274.8


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-21_23-17-26
  done: false
  episode_len_mean: 276.05
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7604999999999844
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1493
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11716770936054664
          cur_lr: 5.000000000000001e-05
          entropy: 1.088359420829349
          entropy_coeff: 0.009999999999999998
          kl: 0.05130250024370724
          policy_loss: -0.032376889636119205
          total_loss: -0.027891238033771516
          vf_explained_var: 0.5192078351974487
          vf_loss: 0.009358248770392189
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,518,15014.9,518000,-2.7605,-2.15,-5.65,276.05


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-21_23-18-02
  done: false
  episode_len_mean: 276.11
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7610999999999852
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1496
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17575156404082
          cur_lr: 5.000000000000001e-05
          entropy: 0.632233691877789
          entropy_coeff: 0.009999999999999998
          kl: 0.005204542822727139
          policy_loss: -0.1020135785970423
          total_loss: -0.09549595225188467
          vf_explained_var: 0.3758816719055176
          vf_loss: 0.011925259004864427
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,519,15051.6,519000,-2.7611,-2.15,-5.65,276.11




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-21_23-18-54
  done: false
  episode_len_mean: 275.89
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.758899999999985
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1500
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17575156404082
          cur_lr: 5.000000000000001e-05
          entropy: 0.6367360101805792
          entropy_coeff: 0.009999999999999998
          kl: 0.004903416560193148
          policy_loss: -0.003511941846874025
          total_loss: 0.00245940958460172
          vf_explained_var: 0.41845595836639404
          vf_loss: 0.011476930768953428
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 5200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,520,15103.5,520000,-2.7589,-2.15,-5.65,275.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-21_23-19-28
  done: false
  episode_len_mean: 276.2
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.7619999999999845
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1503
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08787578202041
          cur_lr: 5.000000000000001e-05
          entropy: 0.8958372539944119
          entropy_coeff: 0.009999999999999998
          kl: 0.010586114582975368
          policy_loss: -0.18117373858888944
          total_loss: -0.17848145680295097
          vf_explained_var: 0.49958181381225586
          vf_loss: 0.010720395514120658
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 5210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,521,15137.7,521000,-2.762,-2.15,-5.65,276.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-21_23-20-03
  done: false
  episode_len_mean: 278.24
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.782399999999984
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1507
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08787578202041
          cur_lr: 5.000000000000001e-05
          entropy: 0.7725296715895335
          entropy_coeff: 0.009999999999999998
          kl: 0.01157385186715747
          policy_loss: 0.025999985966417526
          total_loss: 0.02797273016638226
          vf_explained_var: 0.5909097194671631
          vf_loss: 0.008680981859086185
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,522,15172.1,522000,-2.7824,-2.15,-5.65,278.24


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-21_23-20-38
  done: false
  episode_len_mean: 279.73
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.7972999999999844
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1511
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08787578202041
          cur_lr: 5.000000000000001e-05
          entropy: 0.8829357034630245
          entropy_coeff: 0.009999999999999998
          kl: 0.016135534453972776
          policy_loss: -0.012264960259199143
          total_loss: -0.007960578261150254
          vf_explained_var: 0.47752419114112854
          vf_loss: 0.011715815888924732
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,523,15207.6,523000,-2.7973,-2.3,-5.65,279.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-21_23-21-12
  done: false
  episode_len_mean: 280.27
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8026999999999846
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1514
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08787578202041
          cur_lr: 5.000000000000001e-05
          entropy: 0.9758203791247474
          entropy_coeff: 0.009999999999999998
          kl: 0.03432603155561965
          policy_loss: -0.0717001839644379
          total_loss: -0.07006839364767074
          vf_explained_var: 0.5829194188117981
          vf_loss: 0.00837356524651922
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,524,15241.1,524000,-2.8027,-2.3,-5.65,280.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-21_23-21-46
  done: false
  episode_len_mean: 281.22
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.812199999999984
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 3
  episodes_total: 1517
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13181367303061498
          cur_lr: 5.000000000000001e-05
          entropy: 0.7245148933596082
          entropy_coeff: 0.009999999999999998
          kl: 0.0084175545171512
          policy_loss: -0.1681638992495007
          total_loss: -0.16219142907195622
          vf_explained_var: 0.4731777310371399
          vf_loss: 0.012108071272571881
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 52500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,525,15275.6,525000,-2.8122,-2.3,-5.65,281.22


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-21_23-22-22
  done: false
  episode_len_mean: 280.97
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8096999999999834
  episode_reward_min: -5.649999999999924
  episodes_this_iter: 4
  episodes_total: 1521
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13181367303061498
          cur_lr: 5.000000000000001e-05
          entropy: 0.6397123826874627
          entropy_coeff: 0.009999999999999998
          kl: 0.006298111053176707
          policy_loss: -0.008874957511822382
          total_loss: -0.005911606053511302
          vf_explained_var: 0.5720086097717285
          vf_loss: 0.008530295663513243
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,526,15311.4,526000,-2.8097,-2.3,-5.65,280.97


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-21_23-22-56
  done: false
  episode_len_mean: 278.86
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.788599999999984
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1525
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13181367303061498
          cur_lr: 5.000000000000001e-05
          entropy: 0.8736242334047953
          entropy_coeff: 0.009999999999999998
          kl: 0.01296757342701075
          policy_loss: 0.011549840950303607
          total_loss: 0.01696177042192883
          vf_explained_var: 0.4486713409423828
          vf_loss: 0.012438867561933067
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 5270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,527,15344.8,527000,-2.7886,-2.3,-4.23,278.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-21_23-23-31
  done: false
  episode_len_mean: 278.78
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.7877999999999847
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1529
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13181367303061498
          cur_lr: 5.000000000000001e-05
          entropy: 0.7247985184192658
          entropy_coeff: 0.009999999999999998
          kl: 0.0057627181774790965
          policy_loss: 0.021342643019225862
          total_loss: 0.025704831961128445
          vf_explained_var: 0.5127815008163452
          vf_loss: 0.010850569212602244
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,528,15380.6,528000,-2.7878,-2.3,-4.23,278.78




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-21_23-24-19
  done: false
  episode_len_mean: 278.57
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.785699999999984
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 3
  episodes_total: 1532
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13181367303061498
          cur_lr: 5.000000000000001e-05
          entropy: 1.428917889462577
          entropy_coeff: 0.009999999999999998
          kl: 0.027995499812700542
          policy_loss: 0.0534080032673147
          total_loss: 0.04808088400297695
          vf_explained_var: 0.7692311406135559
          vf_loss: 0.005271867638738412
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 52900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,529,15428.5,529000,-2.7857,-2.22,-4.23,278.57


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-21_23-24-57
  done: false
  episode_len_mean: 277.84
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7783999999999853
  episode_reward_min: -4.229999999999954
  episodes_this_iter: 4
  episodes_total: 1536
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19772050954592252
          cur_lr: 5.000000000000001e-05
          entropy: 0.684778071112103
          entropy_coeff: 0.009999999999999998
          kl: 0.00695691713533365
          policy_loss: -0.001236222187678019
          total_loss: 0.00544845602578587
          vf_explained_var: 0.43504130840301514
          vf_loss: 0.012156932490567367
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,530,15466.2,530000,-2.7784,-2.22,-4.23,277.84


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-21_23-25-27
  done: false
  episode_len_mean: 278.84
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7883999999999842
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 2
  episodes_total: 1538
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19772050954592252
          cur_lr: 5.000000000000001e-05
          entropy: 1.560646798213323
          entropy_coeff: 0.009999999999999998
          kl: 0.09134241560631122
          policy_loss: -0.09037613231274817
          total_loss: -0.08106098601387607
          vf_explained_var: 0.8246917724609375
          vf_loss: 0.006861344340076256
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,531,15495.8,531000,-2.7884,-2.22,-4.65,278.84


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-21_23-25-59
  done: false
  episode_len_mean: 278.47
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.784699999999984
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 1542
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29658076431888375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9550244477060106
          entropy_coeff: 0.009999999999999998
          kl: 0.02115438113337665
          policy_loss: -0.06500604243742095
          total_loss: -0.06051959755520026
          vf_explained_var: 0.7530812621116638
          vf_loss: 0.007762706980833577
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,532,15527.7,532000,-2.7847,-2.22,-4.65,278.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-21_23-26-23
  done: false
  episode_len_mean: 282.41
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8240999999999827
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 1544
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4448711464783256
          cur_lr: 5.000000000000001e-05
          entropy: 1.9085237913661532
          entropy_coeff: 0.009999999999999998
          kl: 0.024660101506524906
          policy_loss: 0.05289585217833519
          total_loss: 0.047922846116125586
          vf_explained_var: 0.45990535616874695
          vf_loss: 0.0031416617534382064
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,533,15552.1,533000,-2.8241,-2.22,-5.22,282.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-21_23-26-56
  done: false
  episode_len_mean: 281.51
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.815099999999984
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 1548
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6673067197174883
          cur_lr: 5.000000000000001e-05
          entropy: 0.8944945606920455
          entropy_coeff: 0.009999999999999998
          kl: 0.005415481968285086
          policy_loss: -0.07037271613048182
          total_loss: -0.06561588305566046
          vf_explained_var: 0.5445055961608887
          vf_loss: 0.010087992100872927
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,534,15585.5,534000,-2.8151,-2.22,-5.22,281.51


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-21_23-27-32
  done: false
  episode_len_mean: 280.63
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8062999999999843
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 1552
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6673067197174883
          cur_lr: 5.000000000000001e-05
          entropy: 0.6154280728764004
          entropy_coeff: 0.009999999999999998
          kl: 0.0017261347116972717
          policy_loss: -0.011831928458478715
          total_loss: -0.005200755844513575
          vf_explained_var: 0.3984777331352234
          vf_loss: 0.011633593796028031
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,535,15621,535000,-2.8063,-2.22,-5.22,280.63


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-21_23-28-03
  done: false
  episode_len_mean: 281.67
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8166999999999844
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 1555
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33365335985874417
          cur_lr: 5.000000000000001e-05
          entropy: 0.6833131922615899
          entropy_coeff: 0.009999999999999998
          kl: 0.014191004185207825
          policy_loss: 0.03858140764964951
          total_loss: 0.044719645712110734
          vf_explained_var: 0.5205349922180176
          vf_loss: 0.00823649807393344
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,536,15652.5,536000,-2.8167,-2.22,-5.22,281.67


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-21_23-28-38
  done: false
  episode_len_mean: 283.08
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.830799999999984
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 1558
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33365335985874417
          cur_lr: 5.000000000000001e-05
          entropy: 0.9462871220376756
          entropy_coeff: 0.009999999999999998
          kl: 0.009912813227136831
          policy_loss: 0.03171752393245697
          total_loss: 0.034423879202869205
          vf_explained_var: 0.45893394947052
          vf_loss: 0.008861781228592411
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 5370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,537,15686.7,537000,-2.8308,-2.22,-5.22,283.08




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-21_23-29-32
  done: false
  episode_len_mean: 282.89
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.828899999999984
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 1562
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33365335985874417
          cur_lr: 5.000000000000001e-05
          entropy: 0.8177714354462093
          entropy_coeff: 0.009999999999999998
          kl: 0.006717143542026547
          policy_loss: 0.0208448626101017
          total_loss: 0.026199301415019565
          vf_explained_var: 0.417985200881958
          vf_loss: 0.011290956686975228
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 5380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,538,15741,538000,-2.8289,-2.22,-5.22,282.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-21_23-29-58
  done: false
  episode_len_mean: 284.31
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8428999999999838
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 1564
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33365335985874417
          cur_lr: 5.000000000000001e-05
          entropy: 1.4699549674987793
          entropy_coeff: 0.009999999999999998
          kl: 0.02824286259086727
          policy_loss: -0.10913688888152441
          total_loss: -0.09959208799733056
          vf_explained_var: 0.4940943121910095
          vf_loss: 0.01482102370613979
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,539,15766.8,539000,-2.8429,-2.22,-5.22,284.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-21_23-30-23
  done: false
  episode_len_mean: 288.28
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8825999999999823
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 1567
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5004800397881163
          cur_lr: 5.000000000000001e-05
          entropy: 1.5042992883258395
          entropy_coeff: 0.009999999999999998
          kl: 0.01415713043942855
          policy_loss: 0.01968182838625378
          total_loss: 0.018569143116474153
          vf_explained_var: -0.048288021236658096
          vf_loss: 0.0068449485503758
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,540,15792.2,540000,-2.8826,-2.22,-5.22,288.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-21_23-30-54
  done: false
  episode_len_mean: 289.77
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8974999999999818
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 1570
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5004800397881163
          cur_lr: 5.000000000000001e-05
          entropy: 1.2549593839380475
          entropy_coeff: 0.009999999999999998
          kl: 0.01218555512483748
          policy_loss: -0.12891447792450586
          total_loss: -0.12469801993833649
          vf_explained_var: 0.3665786683559418
          vf_loss: 0.01066741987855898
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,541,15822.9,541000,-2.8975,-2.22,-5.22,289.77


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-21_23-31-24
  done: false
  episode_len_mean: 292.31
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9516999999999816
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1573
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5004800397881163
          cur_lr: 5.000000000000001e-05
          entropy: 1.4860978543758392
          entropy_coeff: 0.009999999999999998
          kl: 0.027933239973840625
          policy_loss: -0.031089649515019524
          total_loss: 0.10066661373712123
          vf_explained_var: 0.27611881494522095
          vf_loss: 0.13263721331540082
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,542,15852.9,542000,-2.9517,-2.22,-6.99,292.31


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-21_23-31-55
  done: false
  episode_len_mean: 293.3
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.961399999999981
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 4
  episodes_total: 1577
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 0.6462839669651456
          entropy_coeff: 0.009999999999999998
          kl: 0.00893772916307106
          policy_loss: 0.038903765794303685
          total_loss: 0.06042686464885871
          vf_explained_var: 0.30151379108428955
          vf_loss: 0.02127620574707786
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 54300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,543,15883.8,543000,-2.9614,-2.22,-6.99,293.3


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-21_23-32-24
  done: false
  episode_len_mean: 295.41
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.982499999999981
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 2
  episodes_total: 1579
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.663170936372545
          entropy_coeff: 0.009999999999999998
          kl: 0.013461483847778845
          policy_loss: -0.10115492687457138
          total_loss: -0.09730842386682828
          vf_explained_var: 0.6289772391319275
          vf_loss: 0.010372403178674479
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,544,15913,544000,-2.9825,-2.22,-6.99,295.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-21_23-32-54
  done: false
  episode_len_mean: 295.27
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.981099999999981
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1582
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 0.9151265439059999
          entropy_coeff: 0.009999999999999998
          kl: 0.007812778475964363
          policy_loss: -0.0768105393482579
          total_loss: -0.05939227690299352
          vf_explained_var: 0.257935494184494
          vf_loss: 0.020704319804079004
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 5450

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,545,15942.3,545000,-2.9811,-2.22,-6.99,295.27


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-21_23-33-21
  done: false
  episode_len_mean: 298.87
  episode_media: {}
  episode_reward_max: -1.599999999999999
  episode_reward_mean: -2.99629999999998
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1585
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.4383900066216786
          entropy_coeff: 0.009999999999999998
          kl: 0.0124131984002811
          policy_loss: -0.08423556192881532
          total_loss: -0.047861592678560154
          vf_explained_var: 0.5942037105560303
          vf_loss: 0.041439028694811794
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 54600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,546,15969.6,546000,-2.9963,-1.6,-6.99,298.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-21_23-33-50
  done: false
  episode_len_mean: 302.59
  episode_media: {}
  episode_reward_max: -1.599999999999999
  episode_reward_mean: -3.0334999999999797
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1588
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.513462746805615
          entropy_coeff: 0.009999999999999998
          kl: 0.010120088841495326
          policy_loss: 0.09376635367257727
          total_loss: 0.0958228278077311
          vf_explained_var: 0.7081937789916992
          vf_loss: 0.009593747670037879
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,547,15998.7,547000,-3.0335,-1.6,-6.99,302.59




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-21_23-34-38
  done: false
  episode_len_mean: 304.59
  episode_media: {}
  episode_reward_max: -1.599999999999999
  episode_reward_mean: -3.0534999999999797
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1591
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.3619101173347896
          entropy_coeff: 0.009999999999999998
          kl: 0.010154122209900151
          policy_loss: 0.011690600050820244
          total_loss: 0.020275824848148556
          vf_explained_var: 0.23529425263404846
          vf_loss: 0.014581422660396332
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,548,16046.6,548000,-3.0535,-1.6,-6.99,304.59


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-21_23-35-05
  done: false
  episode_len_mean: 307.2
  episode_media: {}
  episode_reward_max: -1.599999999999999
  episode_reward_mean: -3.05839999999998
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1594
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.8735261638959249
          entropy_coeff: 0.009999999999999998
          kl: 0.012270253152601789
          policy_loss: -0.060635235119197106
          total_loss: -0.022225452918145393
          vf_explained_var: 0.5708224177360535
          vf_loss: 0.04793352171416498
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 5490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,549,16073.7,549000,-3.0584,-1.6,-6.99,307.2


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-21_23-35-37
  done: false
  episode_len_mean: 308.62
  episode_media: {}
  episode_reward_max: -1.0399999999999963
  episode_reward_mean: -3.041699999999979
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1597
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 0.9004690647125244
          entropy_coeff: 0.009999999999999998
          kl: 0.016463852621530547
          policy_loss: -0.023141060686773723
          total_loss: 0.03849399354722765
          vf_explained_var: 0.346019983291626
          vf_loss: 0.05827999864187505
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 5500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,550,16105.6,550000,-3.0417,-1.04,-6.99,308.62


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-21_23-36-08
  done: false
  episode_len_mean: 309.14
  episode_media: {}
  episode_reward_max: -1.0399999999999963
  episode_reward_mean: -3.0468999999999795
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 3
  episodes_total: 1600
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 0.6709163361125522
          entropy_coeff: 0.009999999999999998
          kl: 0.007804205610817484
          policy_loss: -0.10824929045306311
          total_loss: -0.08290777073966132
          vf_explained_var: 0.4428415894508362
          vf_loss: 0.02619190605150329
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,551,16137.1,551000,-3.0469,-1.04,-6.99,309.14


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-21_23-36-34
  done: false
  episode_len_mean: 312.35
  episode_media: {}
  episode_reward_max: -1.0399999999999963
  episode_reward_mean: -3.0875999999999784
  episode_reward_min: -6.989999999999951
  episodes_this_iter: 2
  episodes_total: 1602
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.8862085037761265
          entropy_coeff: 0.009999999999999998
          kl: 0.019239310594780137
          policy_loss: -0.09797456603911188
          total_loss: 0.0031062051653862
          vf_explained_var: 0.6928490400314331
          vf_loss: 0.10549951808320152
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 5520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,552,16162.5,552000,-3.0876,-1.04,-6.99,312.35


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-21_23-37-00
  done: false
  episode_len_mean: 315.5
  episode_media: {}
  episode_reward_max: -1.0399999999999963
  episode_reward_mean: -3.146899999999978
  episode_reward_min: -9.529999999999939
  episodes_this_iter: 3
  episodes_total: 1605
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7507200596821744
          cur_lr: 5.000000000000001e-05
          entropy: 1.9572295930650498
          entropy_coeff: 0.009999999999999998
          kl: 0.023203547312046702
          policy_loss: -0.10621733110811975
          total_loss: 0.1144340183171961
          vf_explained_var: 0.6911612749099731
          vf_loss: 0.22280428053604232
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,553,16188.8,553000,-3.1469,-1.04,-9.53,315.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-21_23-37-28
  done: false
  episode_len_mean: 318.92
  episode_media: {}
  episode_reward_max: -1.0399999999999963
  episode_reward_mean: -3.194099999999977
  episode_reward_min: -9.529999999999939
  episodes_this_iter: 3
  episodes_total: 1608
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.4953568597634634
          entropy_coeff: 0.009999999999999998
          kl: 0.013608468279316494
          policy_loss: -0.047956320726209214
          total_loss: 0.023835057102971607
          vf_explained_var: 0.7366610169410706
          vf_loss: 0.07142072301875386
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,554,16216.2,554000,-3.1941,-1.04,-9.53,318.92


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-21_23-37-54
  done: false
  episode_len_mean: 320.56
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.169499999999977
  episode_reward_min: -9.529999999999939
  episodes_this_iter: 2
  episodes_total: 1610
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.5511732319990794
          entropy_coeff: 0.009999999999999998
          kl: 0.012199775280642743
          policy_loss: -0.13527706944280202
          total_loss: -0.05993957105610106
          vf_explained_var: 0.7926420569419861
          vf_loss: 0.07711130550338162
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 5550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,555,16242.7,555000,-3.1695,-0.31,-9.53,320.56


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-21_23-38-20
  done: false
  episode_len_mean: 324.28
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.2492999999999754
  episode_reward_min: -9.529999999999939
  episodes_this_iter: 3
  episodes_total: 1613
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.887889019648234
          entropy_coeff: 0.009999999999999998
          kl: 0.015113109797614656
          policy_loss: 0.05473861574298806
          total_loss: 0.14175312783983018
          vf_explained_var: 0.7054204344749451
          vf_loss: 0.08887483032627239
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,556,16268.5,556000,-3.2493,-0.31,-9.53,324.28


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-21_23-38-49
  done: false
  episode_len_mean: 326.47
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.296599999999975
  episode_reward_min: -9.529999999999939
  episodes_this_iter: 2
  episodes_total: 1615
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 2.006280071205563
          entropy_coeff: 0.009999999999999998
          kl: 0.012782124266685907
          policy_loss: 0.021048004925251006
          total_loss: 0.10014936584565375
          vf_explained_var: 0.7351067662239075
          vf_loss: 0.08477046363469627
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,557,16297,557000,-3.2966,-0.31,-9.53,326.47


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-21_23-39-18
  done: false
  episode_len_mean: 327.54
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.3689999999999753
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 3
  episodes_total: 1618
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.3154184407658047
          entropy_coeff: 0.009999999999999998
          kl: 0.014008043427129262
          policy_loss: -0.0289624460041523
          total_loss: 0.15860875133011076
          vf_explained_var: 0.7335397601127625
          vf_loss: 0.1849512021160788
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 55800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,558,16326.2,558000,-3.369,-0.31,-11.18,327.54




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-21_23-40-01
  done: false
  episode_len_mean: 330.73
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.4341999999999753
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 3
  episodes_total: 1621
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9624329182836744
          entropy_coeff: 0.009999999999999998
          kl: 0.013368069980450798
          policy_loss: 0.009956189658906725
          total_loss: 0.17539866003725263
          vf_explained_var: 0.7938399314880371
          vf_loss: 0.17001327959199747
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,559,16369.8,559000,-3.4342,-0.31,-11.18,330.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-21_23-40-24
  done: false
  episode_len_mean: 334.09
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.5191999999999735
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 2
  episodes_total: 1623
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.87421272860633
          entropy_coeff: 0.009999999999999998
          kl: 0.01526945592284474
          policy_loss: -0.0776471205883556
          total_loss: 0.05235169115993712
          vf_explained_var: 0.8353196978569031
          vf_loss: 0.13154631232221922
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,560,16392.1,560000,-3.5192,-0.31,-11.18,334.09


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-21_23-40-44
  done: false
  episode_len_mean: 338.21
  episode_media: {}
  episode_reward_max: -0.309999999999976
  episode_reward_mean: -3.5560999999999723
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 2
  episodes_total: 1625
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.8349717418352762
          entropy_coeff: 0.009999999999999998
          kl: 0.013174688686214846
          policy_loss: -0.04596393174595303
          total_loss: 0.08280051781071557
          vf_explained_var: 0.7797275185585022
          vf_loss: 0.13227841462939977
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,561,16412.8,561000,-3.5561,-0.31,-11.18,338.21


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-21_23-41-09
  done: false
  episode_len_mean: 342.72
  episode_media: {}
  episode_reward_max: -0.09999999999996788
  episode_reward_mean: -3.553599999999972
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 3
  episodes_total: 1628
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9698124064339533
          entropy_coeff: 0.009999999999999998
          kl: 0.011816230076917596
          policy_loss: -0.08069136713941892
          total_loss: 0.0901381069380376
          vf_explained_var: 0.6410759687423706
          vf_loss: 0.17722158146401246
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,562,16437.3,562000,-3.5536,-0.1,-11.18,342.72


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-21_23-41-31
  done: false
  episode_len_mean: 346.32
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.4955999999999716
  episode_reward_min: -11.179999999999948
  episodes_this_iter: 2
  episodes_total: 1630
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 2.1353041251500446
          entropy_coeff: 0.009999999999999998
          kl: 0.016149841985671286
          policy_loss: -0.11085587566097578
          total_loss: 0.01317141064339214
          vf_explained_var: 0.8525574803352356
          vf_loss: 0.12719431097308795
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,563,16459.6,563000,-3.4956,2.42,-11.18,346.32


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-21_23-41-54
  done: false
  episode_len_mean: 348.94
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.5973999999999706
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1632
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 2.131290758980645
          entropy_coeff: 0.009999999999999998
          kl: 0.013266491485181402
          policy_loss: -0.09510460876756244
          total_loss: 0.03487378723091549
          vf_explained_var: 0.6713628172874451
          vf_loss: 0.13635217249393464
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 5640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,564,16482,564000,-3.5974,2.42,-12.41,348.94


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-21_23-42-20
  done: false
  episode_len_mean: 352.87
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.6061999999999705
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1635
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9728207005394829
          entropy_coeff: 0.009999999999999998
          kl: 0.013357377353701436
          policy_loss: -0.11610116387406985
          total_loss: -0.05433337630497085
          vf_explained_var: 0.9473462700843811
          vf_loss: 0.06645451444718573
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,565,16508.3,565000,-3.6062,2.42,-12.41,352.87


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-21_23-42-43
  done: false
  episode_len_mean: 356.29
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.573299999999971
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1637
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.965590046511756
          entropy_coeff: 0.009999999999999998
          kl: 0.012406644708590915
          policy_loss: -0.006911846498648326
          total_loss: 0.07225843361682362
          vf_explained_var: 0.8965105414390564
          vf_loss: 0.0848553016781807
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 56600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,566,16531.6,566000,-3.5733,2.42,-12.41,356.29


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-21_23-43-08
  done: false
  episode_len_mean: 357.02
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.51639999999997
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1639
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.948891912566291
          entropy_coeff: 0.009999999999999998
          kl: 0.010814454661701294
          policy_loss: -0.16645747961269486
          total_loss: -0.10275764142473538
          vf_explained_var: 0.8864635825157166
          vf_loss: 0.07101081602482331
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 56700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,567,16556.1,567000,-3.5164,2.42,-12.41,357.02


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-21_23-43-32
  done: false
  episode_len_mean: 360.21
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.53279999999997
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1641
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.8794397777981229
          entropy_coeff: 0.009999999999999998
          kl: 0.015539301429891333
          policy_loss: -0.09869468907515208
          total_loss: -0.015540089789364072
          vf_explained_var: 0.8477363586425781
          vf_loss: 0.08445049832678503
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,568,16580.5,568000,-3.5328,2.42,-12.41,360.21


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-21_23-44-01
  done: false
  episode_len_mean: 358.89
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.4761999999999706
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1644
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.40601523982154
          entropy_coeff: 0.009999999999999998
          kl: 0.012367712990936102
          policy_loss: -0.06236137329704232
          total_loss: 0.05689941292835606
          vf_explained_var: 0.920965313911438
          vf_loss: 0.11939390634910928
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,569,16608.8,569000,-3.4762,2.42,-12.41,358.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-21_23-44-26
  done: false
  episode_len_mean: 363.56
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.4597999999999702
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1647
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9209901849428812
          entropy_coeff: 0.009999999999999998
          kl: 0.014939529396114122
          policy_loss: 0.007129642739892006
          total_loss: 0.21113756553580362
          vf_explained_var: 0.9171961545944214
          vf_loss: 0.2063947202430831
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 5700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,570,16634.5,570000,-3.4598,2.42,-12.41,363.56


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-21_23-44-53
  done: false
  episode_len_mean: 365.41
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.46359999999997
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1649
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.973683269818624
          entropy_coeff: 0.009999999999999998
          kl: 0.01240626912580096
          policy_loss: -0.1541039394835631
          total_loss: -0.0653945172826449
          vf_explained_var: 0.921789824962616
          vf_loss: 0.09447580441418621
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,571,16661,571000,-3.4636,2.42,-12.41,365.41




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-21_23-45-36
  done: false
  episode_len_mean: 369.29
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.5015999999999696
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1652
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9602665225664775
          entropy_coeff: 0.009999999999999998
          kl: 0.016158677558706338
          policy_loss: -0.10555156231340435
          total_loss: 0.0567410534247756
          vf_explained_var: 0.8650498390197754
          vf_loss: 0.16369931374986965
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 5720

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,572,16704.5,572000,-3.5016,2.42,-12.41,369.29


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-21_23-46-00
  done: false
  episode_len_mean: 371.78
  episode_media: {}
  episode_reward_max: 2.4200000000000816
  episode_reward_mean: -3.6031999999999687
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1654
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 2.058903691503737
          entropy_coeff: 0.009999999999999998
          kl: 0.012385860426667743
          policy_loss: -0.06715631203518974
          total_loss: 0.01387577603260676
          vf_explained_var: 0.8215344548225403
          vf_loss: 0.08767365552484989
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 5730

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,573,16728.5,573000,-3.6032,2.42,-12.41,371.78




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-21_23-46-40
  done: false
  episode_len_mean: 372.71
  episode_media: {}
  episode_reward_max: 9.65
  episode_reward_mean: -3.4587999999999695
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1657
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 2.0799255741967095
          entropy_coeff: 0.009999999999999998
          kl: 0.01837839768270779
          policy_loss: -0.0960179107884566
          total_loss: 0.34868683223095204
          vf_explained_var: 0.7647917866706848
          vf_loss: 0.44480845352841747
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,574,16767.9,574000,-3.4588,9.65,-12.41,372.71




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-21_23-47-37
  done: false
  episode_len_mean: 372.89
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.3458999999999683
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1660
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9905449403656854
          entropy_coeff: 0.009999999999999998
          kl: 0.016643890716867886
          policy_loss: -0.11704133989082442
          total_loss: 0.3843447715457943
          vf_explained_var: 0.7928651571273804
          vf_loss: 0.5025492089490096
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,575,16825.2,575000,-3.3459,9.67,-12.41,372.89


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-21_23-48-00
  done: false
  episode_len_mean: 377.43
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.347799999999968
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1662
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.9281410257021585
          entropy_coeff: 0.009999999999999998
          kl: 0.018871148904435527
          policy_loss: -0.021392945448557535
          total_loss: 0.23471744805574418
          vf_explained_var: 0.8410070538520813
          vf_loss: 0.2541413828316662
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 57600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,576,16847.6,576000,-3.3478,9.67,-12.41,377.43


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-21_23-48-28
  done: false
  episode_len_mean: 377.56
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.4302999999999684
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1665
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1260800895232614
          cur_lr: 5.000000000000001e-05
          entropy: 1.8973881522814433
          entropy_coeff: 0.009999999999999998
          kl: 0.023985590158510896
          policy_loss: -0.03145864920483695
          total_loss: 0.38087465597523584
          vf_explained_var: 0.8320890665054321
          vf_loss: 0.40429749745461674
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 5770

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,577,16875.9,577000,-3.4303,9.67,-12.41,377.56


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-21_23-48-58
  done: false
  episode_len_mean: 377.39
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.3766999999999694
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1668
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.8736491468217638
          entropy_coeff: 0.009999999999999998
          kl: 0.014402963603148874
          policy_loss: -0.056122853027449716
          total_loss: 0.32117916515303985
          vf_explained_var: 0.7873299717903137
          vf_loss: 0.3717101706398858
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 5780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,578,16905.6,578000,-3.3767,9.67,-12.41,377.39


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-21_23-49-25
  done: false
  episode_len_mean: 378.01
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.3982999999999697
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1671
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.7228122923109266
          entropy_coeff: 0.009999999999999998
          kl: 0.007627710984382071
          policy_loss: -0.06890480029914114
          total_loss: 0.16389551088213922
          vf_explained_var: 0.6470365524291992
          vf_loss: 0.23714431774699027
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 5790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,579,16932.7,579000,-3.3983,9.67,-12.41,378.01


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-21_23-49-52
  done: false
  episode_len_mean: 378.91
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.3643999999999696
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1673
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 2.05752368900511
          entropy_coeff: 0.009999999999999998
          kl: 0.007382282249748486
          policy_loss: -0.067207932472229
          total_loss: 0.01872993525531557
          vf_explained_var: 0.7131083011627197
          vf_loss: 0.09404354362438122
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,580,16960,580000,-3.3644,9.67,-12.41,378.91


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-21_23-50-17
  done: false
  episode_len_mean: 383.41
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.471399999999968
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1676
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 2.027350068092346
          entropy_coeff: 0.009999999999999998
          kl: 0.010588596024645862
          policy_loss: -0.056942169864972435
          total_loss: 0.14337007225387627
          vf_explained_var: 0.687515914440155
          vf_loss: 0.20270033306959603
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,581,16984.6,581000,-3.4714,9.67,-12.41,383.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-21_23-50-44
  done: false
  episode_len_mean: 383.04
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.4047999999999683
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 2
  episodes_total: 1678
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.888028085231781
          entropy_coeff: 0.009999999999999998
          kl: 0.009577300146264456
          policy_loss: -0.09759626388549805
          total_loss: 0.04425827090938886
          vf_explained_var: 0.7779462933540344
          vf_loss: 0.14455760152389605
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 58200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,582,17011.5,582000,-3.4048,9.67,-12.41,383.04


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-21_23-51-12
  done: false
  episode_len_mean: 386.69
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.5147999999999673
  episode_reward_min: -12.409999999999926
  episodes_this_iter: 3
  episodes_total: 1681
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.8140712353918287
          entropy_coeff: 0.009999999999999998
          kl: 0.013066908744927335
          policy_loss: 0.012988115598758062
          total_loss: 0.2854510011772315
          vf_explained_var: 0.8450190424919128
          vf_loss: 0.26853202448950875
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 58300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,583,17039.5,583000,-3.5148,9.67,-12.41,386.69




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-21_23-51-56
  done: false
  episode_len_mean: 383.37
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.485899999999969
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1684
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.838239942656623
          entropy_coeff: 0.009999999999999998
          kl: 0.013727973308052753
          policy_loss: -0.06479488826460308
          total_loss: 0.3818844722997811
          vf_explained_var: 0.848931610584259
          vf_loss: 0.4418735542231136
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,584,17083.9,584000,-3.4859,9.67,-14.79,383.37


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-21_23-52-23
  done: false
  episode_len_mean: 385.24
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.408699999999969
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1687
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6555459645059374
          entropy_coeff: 0.009999999999999998
          kl: 0.009298319569760514
          policy_loss: -0.21215047761797906
          total_loss: -0.06616126572092375
          vf_explained_var: 0.9059346318244934
          vf_loss: 0.1468386895954609
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 58500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,585,17111,585000,-3.4087,9.67,-14.79,385.24


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-21_23-52-51
  done: false
  episode_len_mean: 386.41
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.416799999999968
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 2
  episodes_total: 1689
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.82020200226042
          entropy_coeff: 0.009999999999999998
          kl: 0.007841552328590002
          policy_loss: -0.03608209060298072
          total_loss: 0.03490118324342701
          vf_explained_var: 0.8846591711044312
          vf_loss: 0.07593996957358387
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,586,17138.4,586000,-3.4168,9.67,-14.79,386.41


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-21_23-53-17
  done: false
  episode_len_mean: 385.73
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.313999999999968
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1692
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6778541101349724
          entropy_coeff: 0.009999999999999998
          kl: 0.010822602210725308
          policy_loss: -0.06334996215171285
          total_loss: 0.033148831894828215
          vf_explained_var: 0.8844096064567566
          vf_loss: 0.09499665964394807
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 5870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,587,17165,587000,-3.314,9.67,-14.79,385.73




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-21_23-54-37
  done: false
  episode_len_mean: 383.66
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.048199999999967
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 4
  episodes_total: 1696
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.642313876416948
          entropy_coeff: 0.009999999999999998
          kl: 0.006504098428556166
          policy_loss: 0.03508731596585777
          total_loss: 0.11054300557201108
          vf_explained_var: 0.9056372046470642
          vf_loss: 0.08089262530621555
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,588,17244.5,588000,-3.0482,9.67,-14.79,383.66


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-21_23-55-04
  done: false
  episode_len_mean: 384.79
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.105699999999967
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 2
  episodes_total: 1698
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.8806994888517592
          entropy_coeff: 0.009999999999999998
          kl: 0.012597262414588743
          policy_loss: -0.03693060808711582
          total_loss: 0.11961686097913318
          vf_explained_var: 0.8393374681472778
          vf_loss: 0.15407617112828625
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 58900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,589,17271.4,589000,-3.1057,9.67,-14.79,384.79


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-21_23-55-25
  done: false
  episode_len_mean: 387.94
  episode_media: {}
  episode_reward_max: 9.670000000000002
  episode_reward_mean: -3.052399999999967
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1701
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.7018996781773037
          entropy_coeff: 0.009999999999999998
          kl: 0.006889194976957233
          policy_loss: -0.07015501877499951
          total_loss: 0.013856160009486808
          vf_explained_var: 0.8859594464302063
          vf_loss: 0.0893934979957218
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 59000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,590,17292.8,590000,-3.0524,9.67,-14.79,387.94




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-21_23-56-29
  done: false
  episode_len_mean: 384.47
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -2.8415999999999677
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1704
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.7582982301712036
          entropy_coeff: 0.009999999999999998
          kl: 0.009972786801335884
          policy_loss: -0.058675410225987434
          total_loss: 0.275490844849911
          vf_explained_var: 0.7944864630699158
          vf_loss: 0.33490399834182527
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,591,17356.3,591000,-2.8416,9.83,-14.79,384.47




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-21_23-57-31
  done: false
  episode_len_mean: 383.39
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -2.657999999999969
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1707
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6576776994599236
          entropy_coeff: 0.009999999999999998
          kl: 0.00826235905748912
          policy_loss: 0.02699641612254911
          total_loss: 0.13291273098438977
          vf_explained_var: 0.8639044165611267
          vf_loss: 0.10853697351283498
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,592,17418.6,592000,-2.658,9.83,-14.79,383.39




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-21_23-58-46
  done: false
  episode_len_mean: 376.73
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -2.548899999999969
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 4
  episodes_total: 1711
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.878729768594106
          entropy_coeff: 0.009999999999999998
          kl: 0.018173188031921604
          policy_loss: -0.08214920692973667
          total_loss: 0.34011484020286137
          vf_explained_var: 0.7831286787986755
          vf_loss: 0.41035464868570365
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,593,17494,593000,-2.5489,9.83,-14.79,376.73


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-21_23-59-11
  done: false
  episode_len_mean: 376.55
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -2.495899999999969
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 2
  episodes_total: 1713
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.907365526093377
          entropy_coeff: 0.009999999999999998
          kl: 0.012881259155823936
          policy_loss: -0.05761626466280884
          total_loss: 0.1782267494748036
          vf_explained_var: 0.8082355260848999
          vf_loss: 0.2331586761607064
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,594,17518.9,594000,-2.4959,9.83,-14.79,376.55


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-21_23-59-35
  done: false
  episode_len_mean: 378.44
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -2.43169999999997
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1716
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6263914969232347
          entropy_coeff: 0.009999999999999998
          kl: 0.011837244831106552
          policy_loss: -0.043834859629472096
          total_loss: 0.170338209428721
          vf_explained_var: 0.8297490477561951
          vf_loss: 0.2104424587968323
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,595,17542.4,595000,-2.4317,9.83,-14.79,378.44




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-22_00-01-05
  done: false
  episode_len_mean: 370.07
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -1.8957999999999697
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 4
  episodes_total: 1720
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6871310353279114
          entropy_coeff: 0.009999999999999998
          kl: 0.013603514012834545
          policy_loss: -0.10062694748242697
          total_loss: 0.25633223172691133
          vf_explained_var: 0.8502057194709778
          vf_loss: 0.3508525199360318
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,596,17632.8,596000,-1.8958,9.83,-14.79,370.07




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-22_00-02-32
  done: false
  episode_len_mean: 361.38
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -1.5929999999999718
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 5
  episodes_total: 1725
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6330071793662178
          entropy_coeff: 0.009999999999999998
          kl: 0.012394970423439055
          policy_loss: -0.058536325270930924
          total_loss: 0.12773828899694814
          vf_explained_var: 0.8274555802345276
          vf_loss: 0.18166809228973257
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,597,17719.9,597000,-1.593,9.83,-14.79,361.38




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-22_00-03-30
  done: false
  episode_len_mean: 357.3
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -1.4657999999999711
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1728
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.8078252620167203
          entropy_coeff: 0.009999999999999998
          kl: 0.009217179587327233
          policy_loss: -0.14701388875643412
          total_loss: -0.0346491997440656
          vf_explained_var: 0.9039649963378906
          vf_loss: 0.11487401566571659
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,598,17777.2,598000,-1.4658,9.83,-14.79,357.3




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-22_00-05-55
  done: false
  episode_len_mean: 336.84
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -0.8516999999999739
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 7
  episodes_total: 1735
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6192177401648626
          entropy_coeff: 0.009999999999999998
          kl: 0.012387708682837416
          policy_loss: -0.06987894264360268
          total_loss: 0.36095827755828697
          vf_explained_var: 0.9093260765075684
          vf_loss: 0.4261050649194254
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,599,17922.1,599000,-0.8517,9.83,-14.79,336.84




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-22_00-06-56
  done: false
  episode_len_mean: 328.84
  episode_media: {}
  episode_reward_max: 9.83
  episode_reward_mean: -0.6982999999999739
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 4
  episodes_total: 1739
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.708025089899699
          entropy_coeff: 0.009999999999999998
          kl: 0.011994530628277131
          policy_loss: -0.057850226304597324
          total_loss: 0.31973068041519986
          vf_explained_var: 0.9148386716842651
          vf_loss: 0.3744009560983007
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,600,17983.4,600000,-0.6983,9.83,-14.79,328.84




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-22_00-08-38
  done: false
  episode_len_mean: 318.5
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: -0.31459999999997557
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 6
  episodes_total: 1745
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.7969722045792473
          entropy_coeff: 0.009999999999999998
          kl: 0.009562457686714895
          policy_loss: -0.1216080923875173
          total_loss: 0.37419660737117133
          vf_explained_var: 0.869198203086853
          vf_loss: 0.49762228346533244
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,601,18085.9,601000,-0.3146,9.85,-14.79,318.5


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-22_00-09-09
  done: false
  episode_len_mean: 316.18
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: -0.24829999999997596
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 3
  episodes_total: 1748
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.7689760128657024
          entropy_coeff: 0.009999999999999998
          kl: 0.012266543850957035
          policy_loss: -0.1130177553743124
          total_loss: 0.10455727295743095
          vf_explained_var: 0.9231011867523193
          vf_loss: 0.21454512029886247
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 6020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,602,18116.3,602000,-0.2483,9.85,-14.79,316.18


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-22_00-09-37
  done: false
  episode_len_mean: 316.86
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: -0.27339999999997544
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 2
  episodes_total: 1750
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.749110844400194
          entropy_coeff: 0.009999999999999998
          kl: 0.00972357545661587
          policy_loss: -0.05655689239501953
          total_loss: 0.09684262379176087
          vf_explained_var: 0.8566310405731201
          vf_loss: 0.1544663308809201
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,603,18144.3,603000,-0.2734,9.85,-14.79,316.86




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-22_00-12-40
  done: false
  episode_len_mean: 297.72
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: 0.4104000000000228
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 9
  episodes_total: 1759
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.700043307410346
          entropy_coeff: 0.009999999999999998
          kl: 0.011652154781375811
          policy_loss: -0.06222221470541424
          total_loss: 0.42668062473336854
          vf_explained_var: 0.873824417591095
          vf_loss: 0.4862213822702567
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,604,18327.1,604000,0.4104,9.85,-14.79,297.72




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-22_00-15-10
  done: false
  episode_len_mean: 274.87
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 1.2533000000000214
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 7
  episodes_total: 1766
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5388513061735365
          entropy_coeff: 0.009999999999999998
          kl: 0.012719715070556938
          policy_loss: -0.08358174926704831
          total_loss: 0.38766068075266147
          vf_explained_var: 0.8715565800666809
          vf_loss: 0.46514581232849095
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 60500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,605,18477.8,605000,1.2533,9.87,-14.79,274.87




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-22_00-17-52
  done: false
  episode_len_mean: 252.71
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 2.1841000000000204
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 9
  episodes_total: 1775
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.4745124366548326
          entropy_coeff: 0.009999999999999998
          kl: 0.008393230433716268
          policy_loss: -0.03472464291585816
          total_loss: 0.16961167264315818
          vf_explained_var: 0.9413489699363708
          vf_loss: 0.20490426479114426
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 60600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,606,18639.4,606000,2.1841,9.87,-14.79,252.71




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-22_00-18-57
  done: false
  episode_len_mean: 245.6
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 2.4480000000000195
  episode_reward_min: -14.790000000000054
  episodes_this_iter: 4
  episodes_total: 1779
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.693574419286516
          entropy_coeff: 0.009999999999999998
          kl: 0.010615992864847603
          policy_loss: -0.07200325586729579
          total_loss: 0.10165073018934992
          vf_explained_var: 0.8613227605819702
          vf_loss: 0.1726580451346106
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,607,18704,607000,2.448,9.87,-14.79,245.6




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-22_00-21-21
  done: false
  episode_len_mean: 226.75
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 3.2714000000000185
  episode_reward_min: -10.869999999999957
  episodes_this_iter: 8
  episodes_total: 1787
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6515039934052362
          entropy_coeff: 0.009999999999999998
          kl: 0.010014383860565997
          policy_loss: -0.05847271250353919
          total_loss: 0.2080266492234336
          vf_explained_var: 0.9613874554634094
          vf_loss: 0.2660989056030909
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,608,18848,608000,3.2714,9.87,-10.87,226.75




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-22_00-22-42
  done: false
  episode_len_mean: 222.09
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 3.4096000000000184
  episode_reward_min: -10.869999999999957
  episodes_this_iter: 4
  episodes_total: 1791
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5847404374016656
          entropy_coeff: 0.009999999999999998
          kl: 0.008964979305238588
          policy_loss: 0.007583998930123117
          total_loss: 0.3461668787731065
          vf_explained_var: 0.9301505088806152
          vf_loss: 0.33928735703229906
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,609,18929.2,609000,3.4096,9.87,-10.87,222.09




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-22_00-24-46
  done: false
  episode_len_mean: 213.29
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 3.6672000000000167
  episode_reward_min: -10.869999999999957
  episodes_this_iter: 7
  episodes_total: 1798
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5616029249297247
          entropy_coeff: 0.009999999999999998
          kl: 0.010068189954770348
          policy_loss: -0.032351880644758545
          total_loss: 0.2653903265380197
          vf_explained_var: 0.9226712584495544
          vf_loss: 0.2963518560760551
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,610,19052.9,610000,3.6672,9.87,-10.87,213.29




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-22_00-30-40
  done: false
  episode_len_mean: 164.06
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 5.159600000000015
  episode_reward_min: -9.459999999999944
  episodes_this_iter: 17
  episodes_total: 1815
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.3177789171536765
          entropy_coeff: 0.009999999999999998
          kl: 0.010629057561068992
          policy_loss: -0.025745101935333674
          total_loss: 0.6383928507566452
          vf_explained_var: 0.9124143123626709
          vf_loss: 0.6593619843324026
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,611,19407.3,611000,5.1596,9.87,-9.46,164.06




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-22_00-36-01
  done: false
  episode_len_mean: 135.57
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 6.049200000000011
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 15
  episodes_total: 1830
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2691343506177266
          entropy_coeff: 0.009999999999999998
          kl: 0.009565055221492575
          policy_loss: -0.15284000519249175
          total_loss: 0.4983591707216369
          vf_explained_var: 0.9326359033584595
          vf_loss: 0.6477339926693174
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,612,19728.1,612000,6.0492,9.87,-3.9,135.57




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-22_00-38-28
  done: false
  episode_len_mean: 137.97
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 6.093700000000012
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 8
  episodes_total: 1838
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5242449733946057
          entropy_coeff: 0.009999999999999998
          kl: 0.008786871040677054
          policy_loss: -0.01813535276386473
          total_loss: 0.32201309899489083
          vf_explained_var: 0.9007323384284973
          vf_loss: 0.34054881350861654
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,613,19875.3,613000,6.0937,9.87,-3.9,137.97




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-22_00-42-12
  done: false
  episode_len_mean: 113.41
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 6.836000000000012
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 11
  episodes_total: 1849
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.4362152285046048
          entropy_coeff: 0.009999999999999998
          kl: 0.010327762628220298
          policy_loss: -0.12154533490538597
          total_loss: 0.5136065772010221
          vf_explained_var: 0.9099442958831787
          vf_loss: 0.6320692333910201
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,614,20099.2,614000,6.836,9.87,-3.9,113.41




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-22_00-44-08
  done: false
  episode_len_mean: 111.89
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 6.9863000000000115
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 7
  episodes_total: 1856
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.3509185983075036
          entropy_coeff: 0.009999999999999998
          kl: 0.006151170335345392
          policy_loss: 0.00865279449046486
          total_loss: 0.11727501182920402
          vf_explained_var: 0.9024975895881653
          vf_loss: 0.11174133927561343
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,615,20215.5,615000,6.9863,9.87,-3.9,111.89




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-22_00-46-21
  done: false
  episode_len_mean: 113.65
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 6.978400000000011
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 8
  episodes_total: 1864
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5819291651248932
          entropy_coeff: 0.009999999999999998
          kl: 0.008633010656661591
          policy_loss: 0.04210949486328496
          total_loss: 0.3107933384883735
          vf_explained_var: 0.8777355551719666
          vf_loss: 0.26992094384299387
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,616,20347.9,616000,6.9784,9.9,-3.9,113.65




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-22_00-53-40
  done: false
  episode_len_mean: 94.5
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 7.43390000000001
  episode_reward_min: -3.899999999999965
  episodes_this_iter: 21
  episodes_total: 1885
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.3392702394061617
          entropy_coeff: 0.009999999999999998
          kl: 0.012154671448067856
          policy_loss: -0.09562496989965438
          total_loss: 0.45768886531392733
          vf_explained_var: 0.9168016314506531
          vf_loss: 0.5461758391724693
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,617,20787.4,617000,7.4339,9.9,-3.9,94.5




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-22_01-01-55
  done: false
  episode_len_mean: 74.02
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 8.033600000000009
  episode_reward_min: -1.9099999999999644
  episodes_this_iter: 24
  episodes_total: 1909
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2796900272369385
          entropy_coeff: 0.009999999999999998
          kl: 0.007038090403101529
          policy_loss: -0.1141086774567763
          total_loss: 0.3502018527024322
          vf_explained_var: 0.9359392523765564
          vf_loss: 0.46521925131479896
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,618,21281.9,618000,8.0336,9.9,-1.91,74.02




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-22_01-08-22
  done: false
  episode_len_mean: 73.83
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 8.06450000000001
  episode_reward_min: -1.9099999999999644
  episodes_this_iter: 20
  episodes_total: 1929
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2026768114831712
          entropy_coeff: 0.009999999999999998
          kl: 0.007891654050174158
          policy_loss: -0.11984435518582662
          total_loss: 0.18829616771803961
          vf_explained_var: 0.9656645655632019
          vf_loss: 0.30683733804358376
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,619,21669.4,619000,8.0645,9.9,-1.91,73.83




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-22_01-10-08
  done: false
  episode_len_mean: 73.03
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 8.043900000000008
  episode_reward_min: -1.9099999999999644
  episodes_this_iter: 6
  episodes_total: 1935
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6799057642618815
          entropy_coeff: 0.009999999999999998
          kl: 0.01138592692747251
          policy_loss: -0.08968873963587815
          total_loss: 0.18422626629471778
          vf_explained_var: 0.9227951169013977
          vf_loss: 0.2714818651477496
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,620,21774.5,620000,8.0439,9.9,-1.91,73.03




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-22_01-13-29
  done: false
  episode_len_mean: 73.48
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 8.019500000000011
  episode_reward_min: -2.199999999999989
  episodes_this_iter: 11
  episodes_total: 1946
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.328982338640425
          entropy_coeff: 0.009999999999999998
          kl: 0.00826768868888238
          policy_loss: 0.030382520291540357
          total_loss: 0.2874292659262816
          vf_explained_var: 0.9307875633239746
          vf_loss: 0.2563714522454474
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,621,21976,621000,8.0195,9.9,-2.2,73.48




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-22_01-17-36
  done: false
  episode_len_mean: 64.17
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 8.183300000000008
  episode_reward_min: -2.199999999999989
  episodes_this_iter: 13
  episodes_total: 1959
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.397108915117052
          entropy_coeff: 0.009999999999999998
          kl: 0.007132829326498009
          policy_loss: 0.05226166215207842
          total_loss: 0.24469319780667623
          vf_explained_var: 0.9021660685539246
          vf_loss: 0.1943544179201126
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,622,22222.9,622000,8.1833,9.9,-2.2,64.17




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-22_01-22-18
  done: false
  episode_len_mean: 65.09
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.084400000000006
  episode_reward_min: -2.2699999999999854
  episodes_this_iter: 14
  episodes_total: 1973
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2007998612191941
          entropy_coeff: 0.009999999999999998
          kl: 0.008961715561102047
          policy_loss: -0.008728638208574719
          total_loss: 0.40374419076575174
          vf_explained_var: 0.8441107869148254
          vf_loss: 0.4093434178166919
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,623,22505.2,623000,8.0844,9.88,-2.27,65.09




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-22_01-26-10
  done: false
  episode_len_mean: 70.2
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 7.897300000000009
  episode_reward_min: -7.539999999999937
  episodes_this_iter: 12
  episodes_total: 1985
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.6290888574388291
          entropy_coeff: 0.009999999999999998
          kl: 0.015197655950262975
          policy_loss: 0.04019417237076495
          total_loss: 0.3950015595803658
          vf_explained_var: 0.9085731506347656
          vf_loss: 0.34542760211560464
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,624,22736.4,624000,7.8973,9.88,-7.54,70.2




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-22_01-32-43
  done: false
  episode_len_mean: 72.95
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 7.869200000000009
  episode_reward_min: -7.539999999999937
  episodes_this_iter: 20
  episodes_total: 2005
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2996943540043302
          entropy_coeff: 0.009999999999999998
          kl: 0.008062008563856398
          policy_loss: 0.06468590224782626
          total_loss: 0.6279495692915387
          vf_explained_var: 0.9481797814369202
          vf_loss: 0.5626429029636912
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,625,23129.5,625000,7.8692,9.88,-7.54,72.95




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-22_01-37-36
  done: false
  episode_len_mean: 75.89
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 7.820900000000008
  episode_reward_min: -7.539999999999937
  episodes_this_iter: 14
  episodes_total: 2019
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.3726669192314147
          entropy_coeff: 0.009999999999999998
          kl: 0.008875355373897678
          policy_loss: 0.11083278213110236
          total_loss: 0.34513319954276084
          vf_explained_var: 0.9451344609260559
          vf_loss: 0.23303554471996096
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,626,23422.9,626000,7.8209,9.88,-7.54,75.89




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-22_01-46-44
  done: false
  episode_len_mean: 57.06
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.407400000000006
  episode_reward_min: -7.539999999999937
  episodes_this_iter: 28
  episodes_total: 2047
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2487392518255445
          entropy_coeff: 0.009999999999999998
          kl: 0.007295473351145344
          policy_loss: -0.2579962133533425
          total_loss: 0.17941801005767452
          vf_explained_var: 0.9547567367553711
          vf_loss: 0.43757868309815723
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,627,23971.2,627000,8.4074,9.88,-7.54,57.06




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-22_01-53-30
  done: false
  episode_len_mean: 56.14
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 8.506700000000006
  episode_reward_min: -7.539999999999937
  episodes_this_iter: 21
  episodes_total: 2068
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5424119287066989
          entropy_coeff: 0.009999999999999998
          kl: 0.007263647771051228
          policy_loss: 0.1355286568403244
          total_loss: 0.35600035029153027
          vf_explained_var: 0.9663116335868835
          vf_loss: 0.22362663592729304
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,628,24376.9,628000,8.5067,9.87,-7.54,56.14




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-22_01-58-27
  done: false
  episode_len_mean: 47.43
  episode_media: {}
  episode_reward_max: 9.870000000000001
  episode_reward_mean: 8.809500000000003
  episode_reward_min: -0.22999999999996085
  episodes_this_iter: 15
  episodes_total: 2083
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.4067807820108202
          entropy_coeff: 0.009999999999999998
          kl: 0.009624400646534663
          policy_loss: -0.04412259426381853
          total_loss: 0.3286592723594772
          vf_explained_var: 0.9524074792861938
          vf_loss: 0.3705929042564498
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,629,24673.6,629000,8.8095,9.87,-0.23,47.43




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-22_02-08-14
  done: false
  episode_len_mean: 41.79
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 8.995100000000006
  episode_reward_min: -0.22999999999996085
  episodes_this_iter: 30
  episodes_total: 2113
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.332741958565182
          entropy_coeff: 0.009999999999999998
          kl: 0.007139835428435445
          policy_loss: 0.04047258579068714
          total_loss: 0.4303689956665039
          vf_explained_var: 0.9593889117240906
          vf_loss: 0.3911637917160988
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,630,25260.8,630000,8.9951,9.89,-0.23,41.79




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-22_02-16-19
  done: false
  episode_len_mean: 44.93
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 8.924900000000006
  episode_reward_min: -0.22999999999996085
  episodes_this_iter: 25
  episodes_total: 2138
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.1771917833222283
          entropy_coeff: 0.009999999999999998
          kl: 0.0069554188440556905
          policy_loss: 0.009556945537527402
          total_loss: 0.2831556381450759
          vf_explained_var: 0.9606853723526001
          vf_loss: 0.2736220748888122
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,631,25745.4,631000,8.9249,9.89,-0.23,44.93




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-22_02-21-28
  done: false
  episode_len_mean: 47.43
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 8.800200000000006
  episode_reward_min: -2.0899999999999492
  episodes_this_iter: 16
  episodes_total: 2154
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.4485694845517476
          entropy_coeff: 0.009999999999999998
          kl: 0.006741258337095256
          policy_loss: 0.032092429531945126
          total_loss: 0.4736613094806671
          vf_explained_var: 0.9401871562004089
          vf_loss: 0.4446677769223849
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,632,26055.1,632000,8.8002,9.89,-2.09,47.43




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-22_02-27-25
  done: false
  episode_len_mean: 48.99
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 8.775700000000006
  episode_reward_min: -2.0899999999999492
  episodes_this_iter: 18
  episodes_total: 2172
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.5371317969428169
          entropy_coeff: 0.009999999999999998
          kl: 0.011219626516634563
          policy_loss: -0.1568142768409517
          total_loss: 0.24676498158110513
          vf_explained_var: 0.8443039059638977
          vf_loss: 0.39999927083651227
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,633,26411.8,633000,8.7757,9.89,-2.09,48.99




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-22_02-32-45
  done: false
  episode_len_mean: 46.84
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 8.914400000000008
  episode_reward_min: -2.0899999999999492
  episodes_this_iter: 16
  episodes_total: 2188
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.455291990439097
          entropy_coeff: 0.009999999999999998
          kl: 0.009950690611177078
          policy_loss: -0.07328153875552945
          total_loss: 0.430066628050473
          vf_explained_var: 0.9204579591751099
          vf_loss: 0.5010931899150213
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,634,26731.1,634000,8.9144,9.89,-2.09,46.84




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-22_02-40-54
  done: false
  episode_len_mean: 49.92
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.823300000000007
  episode_reward_min: -2.0899999999999492
  episodes_this_iter: 25
  episodes_total: 2213
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.132205193572574
          entropy_coeff: 0.009999999999999998
          kl: 0.007030227300779796
          policy_loss: 0.0008696401077840063
          total_loss: 0.355818246346381
          vf_explained_var: 0.9589000940322876
          vf_loss: 0.35439575877454543
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,635,27220.1,635000,8.8233,9.88,-2.09,49.92




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-22_02-50-17
  done: false
  episode_len_mean: 44.33
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.958300000000005
  episode_reward_min: -2.0899999999999492
  episodes_this_iter: 28
  episodes_total: 2241
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.1314803812238905
          entropy_coeff: 0.009999999999999998
          kl: 0.0054460223246711025
          policy_loss: -0.11688912784059842
          total_loss: 0.3057043980807066
          vf_explained_var: 0.9589693546295166
          vf_loss: 0.4247093414266904
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,636,27783.9,636000,8.9583,9.88,-2.09,44.33




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-22_02-56-59
  done: false
  episode_len_mean: 47.18
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.990200000000007
  episode_reward_min: -0.6599999999999868
  episodes_this_iter: 21
  episodes_total: 2262
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.2733125938309564
          entropy_coeff: 0.009999999999999998
          kl: 0.008103870100549986
          policy_loss: -0.03323927033278677
          total_loss: 0.3010784011955063
          vf_explained_var: 0.959050178527832
          vf_loss: 0.3333623907632298
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,637,28185.6,637000,8.9902,9.88,-0.66,47.18




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-22_03-03-04
  done: false
  episode_len_mean: 47.57
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 8.926900000000005
  episode_reward_min: -2.129999999999974
  episodes_this_iter: 19
  episodes_total: 2281
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.406743840376536
          entropy_coeff: 0.009999999999999998
          kl: 0.007785010763654802
          policy_loss: -0.022783192329936558
          total_loss: 0.258462587164508
          vf_explained_var: 0.8911638259887695
          vf_loss: 0.2821633965190914
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,638,28550.1,638000,8.9269,9.88,-2.13,47.57




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-22_03-11-45
  done: false
  episode_len_mean: 41.55
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 9.075500000000003
  episode_reward_min: -2.129999999999974
  episodes_this_iter: 27
  episodes_total: 2308
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.3397853281762866
          entropy_coeff: 0.009999999999999998
          kl: 0.009260838349718832
          policy_loss: 0.030177855491638185
          total_loss: 0.37174542653891773
          vf_explained_var: 0.9565302729606628
          vf_loss: 0.3393227549062835
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,639,29071.4,639000,9.0755,9.88,-2.13,41.55




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-22_03-23-24
  done: false
  episode_len_mean: 37.98
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 9.138900000000003
  episode_reward_min: -2.129999999999974
  episodes_this_iter: 35
  episodes_total: 2343
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.1438952154583402
          entropy_coeff: 0.009999999999999998
          kl: 0.006139850219054008
          policy_loss: 0.030946958975659477
          total_loss: 0.3759477436542511
          vf_explained_var: 0.9626505374908447
          vf_loss: 0.34606879336966406
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,640,29770.1,640000,9.1389,9.88,-2.13,37.98




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-22_03-35-02
  done: false
  episode_len_mean: 35.86
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 9.200400000000004
  episode_reward_min: -2.129999999999974
  episodes_this_iter: 35
  episodes_total: 2378
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.1392703652381897
          entropy_coeff: 0.009999999999999998
          kl: 0.005959825533184216
          policy_loss: -0.05556810936994023
          total_loss: 0.17774049693511593
          vf_explained_var: 0.9781795740127563
          vf_loss: 0.23463444776005216
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,641,30468.1,641000,9.2004,9.89,-2.13,35.86


Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-22_03-48-38
  done: false
  episode_len_mean: 27.37
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 9.4817
  episode_reward_min: 2.9300000000000406
  episodes_this_iter: 41
  episodes_total: 2419
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6891201342848923
          cur_lr: 5.000000000000001e-05
          entropy: 1.0860453989770678
          entropy_coeff: 0.009999999999999998
          kl: 0.00422571706716469
          policy_loss: 0.03053279436296887
          total_loss: 0.2156438780327638
          vf_explained_var: 0.9830191135406494
          vf_loss: 0.1888337971435653
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations_since_restore: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,642,31284.7,642000,9.4817,9.9,2.93,27.37




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-22_04-01-12
  done: false
  episode_len_mean: 26.59
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 9.499200000000002
  episode_reward_min: 4.390000000000011
  episodes_this_iter: 38
  episodes_total: 2457
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.1881545371479458
          entropy_coeff: 0.009999999999999998
          kl: 0.00953362854984879
          policy_loss: -0.00642642734779252
          total_loss: 0.22895587506807513
          vf_explained_var: 0.9809329509735107
          vf_loss: 0.23921212769216962
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,643,32038.2,643000,9.4992,9.9,4.39,26.59




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-22_04-12-32
  done: false
  episode_len_mean: 25.76
  episode_media: {}
  episode_reward_max: 9.9
  episode_reward_mean: 9.547100000000002
  episode_reward_min: 4.390000000000011
  episodes_this_iter: 34
  episodes_total: 2491
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.110697857538859
          entropy_coeff: 0.009999999999999998
          kl: 0.00732211839182663
          policy_loss: -0.13012778013944626
          total_loss: 0.08588521778583527
          vf_explained_var: 0.9822896122932434
          vf_loss: 0.22093600614203346
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,644,32718.2,644000,9.5471,9.9,4.39,25.76




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-22_04-17-57
  done: false
  episode_len_mean: 31.27
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 9.422700000000004
  episode_reward_min: 2.1900000000000794
  episodes_this_iter: 17
  episodes_total: 2508
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 0.9914806167284648
          entropy_coeff: 0.009999999999999998
          kl: 0.013670982332408401
          policy_loss: 0.0608132016327646
          total_loss: 0.25819269286261665
          vf_explained_var: 0.9584059715270996
          vf_loss: 0.19574832618236543
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,645,33043.6,645000,9.4227,9.88,2.19,31.27




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-22_04-31-49
  done: false
  episode_len_mean: 31.31
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 9.471200000000003
  episode_reward_min: 2.1900000000000794
  episodes_this_iter: 42
  episodes_total: 2550
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.0504484626981947
          entropy_coeff: 0.009999999999999998
          kl: 0.007407324714228138
          policy_loss: -0.03591293882992533
          total_loss: 0.1688719785047902
          vf_explained_var: 0.9795944094657898
          vf_loss: 0.20903347043527498
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,646,33875.1,646000,9.4712,9.89,2.19,31.31




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-22_04-42-39
  done: false
  episode_len_mean: 33.43
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 9.420200000000005
  episode_reward_min: 2.1900000000000794
  episodes_this_iter: 33
  episodes_total: 2583
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.2273651281992595
          entropy_coeff: 0.009999999999999998
          kl: 0.013770955944040361
          policy_loss: 0.05088536325428221
          total_loss: 0.17977653137511676
          vf_explained_var: 0.8972699642181396
          vf_loss: 0.12953442194395595
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,647,34525,647000,9.4202,9.89,2.19,33.43




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-22_04-53-04
  done: false
  episode_len_mean: 28.14
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 9.541500000000003
  episode_reward_min: -0.10999999999996321
  episodes_this_iter: 32
  episodes_total: 2615
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.1860440095265707
          entropy_coeff: 0.009999999999999998
          kl: 0.014050241382622196
          policy_loss: 0.05074766779111491
          total_loss: 0.29236667801936467
          vf_explained_var: 0.9755855798721313
          vf_loss: 0.24161317265695995
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,648,35149.8,648000,9.5415,9.89,-0.11,28.14




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-22_05-02-31
  done: false
  episode_len_mean: 31.77
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.496200000000004
  episode_reward_min: -0.10999999999996321
  episodes_this_iter: 29
  episodes_total: 2644
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.278764463795556
          entropy_coeff: 0.009999999999999998
          kl: 0.009586500075925061
          policy_loss: 0.0460032203545173
          total_loss: 0.27883441783487795
          vf_explained_var: 0.8316453099250793
          vf_loss: 0.23752246871590615
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,649,35717.1,649000,9.4962,9.92,-0.11,31.77




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-22_05-10-55
  done: false
  episode_len_mean: 32.73
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.516000000000004
  episode_reward_min: -0.10999999999996321
  episodes_this_iter: 25
  episodes_total: 2669
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 0.8760995341671838
          entropy_coeff: 0.009999999999999998
          kl: 0.007965059220867054
          policy_loss: 0.05004816858304872
          total_loss: 0.1610892424152957
          vf_explained_var: 0.7483657598495483
          vf_loss: 0.11307509826082322
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,650,36221.7,650000,9.516,9.92,-0.11,32.73




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-22_05-18-02
  done: false
  episode_len_mean: 38.35
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.381400000000003
  episode_reward_min: -4.129999999999926
  episodes_this_iter: 22
  episodes_total: 2691
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.3545977228217654
          entropy_coeff: 0.009999999999999998
          kl: 0.009769410075311116
          policy_loss: 0.08141325389345487
          total_loss: 0.3010880122996039
          vf_explained_var: 0.5593178868293762
          vf_loss: 0.22496988204721774
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,651,36648.2,651000,9.3814,9.92,-4.13,38.35




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-22_05-28-30
  done: false
  episode_len_mean: 34.72
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.516800000000003
  episode_reward_min: -4.129999999999926
  episodes_this_iter: 31
  episodes_total: 2722
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 1.0352092213100856
          entropy_coeff: 0.009999999999999998
          kl: 0.010059717126059967
          policy_loss: -0.017102098133828906
          total_loss: 0.4467578956650363
          vf_explained_var: 0.9703674912452698
          vf_loss: 0.4657160555322965
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,652,37275.9,652000,9.5168,9.92,-4.13,34.72




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-22_05-42-03
  done: false
  episode_len_mean: 31.13
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.543000000000005
  episode_reward_min: -4.129999999999926
  episodes_this_iter: 42
  episodes_total: 2764
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8445600671424461
          cur_lr: 5.000000000000001e-05
          entropy: 0.8316127293639712
          entropy_coeff: 0.009999999999999998
          kl: 0.02715366996582465
          policy_loss: 0.019435849454667833
          total_loss: 0.32188892662525176
          vf_explained_var: 0.9759241938591003
          vf_loss: 0.2878363059212764
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,653,38089.5,653000,9.543,9.91,-4.13,31.13




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-22_05-54-46
  done: false
  episode_len_mean: 25.1
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.681900000000004
  episode_reward_min: -2.1899999999999733
  episodes_this_iter: 38
  episodes_total: 2802
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2668401007136691
          cur_lr: 5.000000000000001e-05
          entropy: 0.8744705379009247
          entropy_coeff: 0.009999999999999998
          kl: 0.0038224250129465175
          policy_loss: -0.19393511331743665
          total_loss: -0.10533341252141529
          vf_explained_var: 0.9912302494049072
          vf_loss: 0.09250400521688991
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,654,38852.5,654000,9.6819,9.92,-2.19,25.1




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-22_06-04-36
  done: false
  episode_len_mean: 23.02
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.721400000000001
  episode_reward_min: 2.440000000000074
  episodes_this_iter: 30
  episodes_total: 2832
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6334200503568346
          cur_lr: 5.000000000000001e-05
          entropy: 1.060004633002811
          entropy_coeff: 0.009999999999999998
          kl: 0.01194406770237898
          policy_loss: -0.07710387259721756
          total_loss: 0.14763955908517043
          vf_explained_var: 0.9861479997634888
          vf_loss: 0.227777866481079
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,655,39441.6,655000,9.7214,9.92,2.44,23.02




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-22_06-17-54
  done: false
  episode_len_mean: 27.92
  episode_media: {}
  episode_reward_max: 9.92
  episode_reward_mean: 9.632900000000003
  episode_reward_min: -3.0999999999999552
  episodes_this_iter: 41
  episodes_total: 2873
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6334200503568346
          cur_lr: 5.000000000000001e-05
          entropy: 0.8239151047335731
          entropy_coeff: 0.009999999999999998
          kl: 0.010390150010047665
          policy_loss: 0.09625570492611991
          total_loss: 0.1149227480093638
          vf_explained_var: 0.9965494871139526
          vf_loss: 0.02032486089091334
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,656,40239.7,656000,9.6329,9.92,-3.1,27.92




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-22_06-36-21
  done: false
  episode_len_mean: 24.17
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.719700000000001
  episode_reward_min: -3.0999999999999552
  episodes_this_iter: 56
  episodes_total: 2929
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6334200503568346
          cur_lr: 5.000000000000001e-05
          entropy: 0.7637151731385126
          entropy_coeff: 0.009999999999999998
          kl: 0.004888338383689719
          policy_loss: 0.007555161830451754
          total_loss: 0.0683435980644491
          vf_explained_var: 0.9935985207557678
          vf_loss: 0.06532921737266911
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 657000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,657,41346.6,657000,9.7197,9.91,-3.1,24.17




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-22_06-46-09
  done: false
  episode_len_mean: 22.67
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 9.736
  episode_reward_min: 2.05000000000008
  episodes_this_iter: 30
  episodes_total: 2959
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3167100251784173
          cur_lr: 5.000000000000001e-05
          entropy: 1.3358636524942187
          entropy_coeff: 0.009999999999999998
          kl: 0.025103837365192463
          policy_loss: 0.05715130666891734
          total_loss: 0.21674030708769956
          vf_explained_var: 0.904036283493042
          vf_loss: 0.16499700190292466
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,658,41935,658000,9.736,9.92,2.05,22.67




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-22_06-57-18
  done: false
  episode_len_mean: 22.74
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 9.705800000000002
  episode_reward_min: 2.05000000000008
  episodes_this_iter: 33
  episodes_total: 2992
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47506503776762615
          cur_lr: 5.000000000000001e-05
          entropy: 0.9982755124568939
          entropy_coeff: 0.009999999999999998
          kl: 0.021209225270797008
          policy_loss: -0.03061943170097139
          total_loss: 0.1851247446404563
          vf_explained_var: 0.9814234375953674
          vf_loss: 0.21565116834309367
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,659,42604.1,659000,9.7058,9.92,2.05,22.74




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-22_07-05-34
  done: false
  episode_len_mean: 32.12
  episode_media: {}
  episode_reward_max: 9.920000000000002
  episode_reward_mean: 9.469200000000003
  episode_reward_min: -0.6299999999999408
  episodes_this_iter: 26
  episodes_total: 3018
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 1.0845109323660533
          entropy_coeff: 0.009999999999999998
          kl: 0.007419203132967514
          policy_loss: -0.16277889421002734
          total_loss: -0.0016262662079599168
          vf_explained_var: 0.9854551553726196
          vf_loss: 0.16671083230111333
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,660,43099.7,660000,9.4692,9.92,-0.63,32.12




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-22_07-23-10
  done: false
  episode_len_mean: 27.73
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.570900000000002
  episode_reward_min: -0.6299999999999408
  episodes_this_iter: 53
  episodes_total: 3071
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.7566842337449392
          entropy_coeff: 0.009999999999999998
          kl: 0.006314049733166232
          policy_loss: -0.03515816488199764
          total_loss: 0.02894158818655544
          vf_explained_var: 0.9939906597137451
          vf_loss: 0.06716721991284026
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,661,44155.6,661000,9.5709,9.91,-0.63,27.73




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-22_07-41-59
  done: false
  episode_len_mean: 18.25
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.828300000000002
  episode_reward_min: 8.790000000000003
  episodes_this_iter: 57
  episodes_total: 3128
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.733229876226849
          entropy_coeff: 0.009999999999999998
          kl: 0.006789631043932667
          policy_loss: -0.017061716690659524
          total_loss: 0.032740483929713564
          vf_explained_var: 0.9951433539390564
          vf_loss: 0.052296226678623096
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 662000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,662,45285,662000,9.8283,9.91,8.79,18.25




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-22_07-59-58
  done: false
  episode_len_mean: 17.8
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.813600000000001
  episode_reward_min: 7.680000000000005
  episodes_this_iter: 54
  episodes_total: 3182
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.7216582192314995
          entropy_coeff: 0.009999999999999998
          kl: 0.006407803821678714
          policy_loss: -0.012524190462297864
          total_loss: 0.09642268700732125
          vf_explained_var: 0.9898824691772461
          vf_loss: 0.11159727869348393
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,663,46363.7,663000,9.8136,9.91,7.68,17.8




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-22_08-17-33
  done: false
  episode_len_mean: 18.61
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.765500000000003
  episode_reward_min: 7.680000000000005
  episodes_this_iter: 53
  episodes_total: 3235
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.7512330035368602
          entropy_coeff: 0.009999999999999998
          kl: 0.005789155934714726
          policy_loss: 0.015682384454541735
          total_loss: 0.1572984381682343
          vf_explained_var: 0.9873506426811218
          vf_loss: 0.1450030432186193
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,664,47419.1,664000,9.7655,9.91,7.68,18.61




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-22_08-35-33
  done: false
  episode_len_mean: 18.93
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.693800000000001
  episode_reward_min: 3.3500000000000085
  episodes_this_iter: 54
  episodes_total: 3289
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.7897934973239898
          entropy_coeff: 0.009999999999999998
          kl: 0.007035060853624763
          policy_loss: -0.044890537692440884
          total_loss: 0.09121894902653165
          vf_explained_var: 0.99031001329422
          vf_loss: 0.13899425755565364
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,665,48499,665000,9.6938,9.91,3.35,18.93




Result for PPO_my_env_0b57e_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-22_08-54-28
  done: false
  episode_len_mean: 18.16
  episode_media: {}
  episode_reward_max: 9.91
  episode_reward_mean: 9.740700000000002
  episode_reward_min: 3.3500000000000085
  episodes_this_iter: 57
  episodes_total: 3346
  experiment_id: e6d1265ae3d54b7b8f850cfb5dadc7e7
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7125975566514392
          cur_lr: 5.000000000000001e-05
          entropy: 0.73597548339102
          entropy_coeff: 0.009999999999999998
          kl: 0.006207044876574308
          policy_loss: -0.0427838691820701
          total_loss: 0.007410994379056825
          vf_explained_var: 0.995331883430481
          vf_loss: 0.053131493512127134
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_0b57e_00000,RUNNING,192.168.3.5:11962,666,49634.1,666000,9.7407,9.91,3.35,18.16


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

KeyboardInterrupt: 